summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--Documentation/networking/dccp.txt8
-rw-r--r--Documentation/power/interface.txt15
-rw-r--r--Documentation/sysctl/vm.txt27
-rw-r--r--MAINTAINERS24
-rw-r--r--arch/alpha/Kconfig2
-rw-r--r--arch/alpha/mm/init.c2
-rw-r--r--arch/arm/mm/ioremap.c2
-rw-r--r--arch/avr32/Kconfig196
-rw-r--r--arch/avr32/Kconfig.debug19
-rw-r--r--arch/avr32/Makefile84
-rw-r--r--arch/avr32/boards/atstk1000/Makefile2
-rw-r--r--arch/avr32/boards/atstk1000/atstk1002.c37
-rw-r--r--arch/avr32/boards/atstk1000/flash.c95
-rw-r--r--arch/avr32/boards/atstk1000/setup.c59
-rw-r--r--arch/avr32/boards/atstk1000/spi.c27
-rw-r--r--arch/avr32/boot/images/Makefile62
-rw-r--r--arch/avr32/boot/u-boot/Makefile3
-rw-r--r--arch/avr32/boot/u-boot/empty.S1
-rw-r--r--arch/avr32/boot/u-boot/head.S60
-rw-r--r--arch/avr32/configs/atstk1002_defconfig754
-rw-r--r--arch/avr32/kernel/Makefile18
-rw-r--r--arch/avr32/kernel/asm-offsets.c25
-rw-r--r--arch/avr32/kernel/avr32_ksyms.c55
-rw-r--r--arch/avr32/kernel/cpu.c327
-rw-r--r--arch/avr32/kernel/entry-avr32b.S678
-rw-r--r--arch/avr32/kernel/head.S45
-rw-r--r--arch/avr32/kernel/init_task.c38
-rw-r--r--arch/avr32/kernel/irq.c71
-rw-r--r--arch/avr32/kernel/kprobes.c270
-rw-r--r--arch/avr32/kernel/module.c324
-rw-r--r--arch/avr32/kernel/process.c276
-rw-r--r--arch/avr32/kernel/ptrace.c371
-rw-r--r--arch/avr32/kernel/semaphore.c148
-rw-r--r--arch/avr32/kernel/setup.c335
-rw-r--r--arch/avr32/kernel/signal.c328
-rw-r--r--arch/avr32/kernel/switch_to.S35
-rw-r--r--arch/avr32/kernel/sys_avr32.c51
-rw-r--r--arch/avr32/kernel/syscall-stubs.S102
-rw-r--r--arch/avr32/kernel/syscall_table.S289
-rw-r--r--arch/avr32/kernel/time.c238
-rw-r--r--arch/avr32/kernel/traps.c425
-rw-r--r--arch/avr32/kernel/vmlinux.lds.c139
-rw-r--r--arch/avr32/lib/Makefile10
-rw-r--r--arch/avr32/lib/__avr32_asr64.S31
-rw-r--r--arch/avr32/lib/__avr32_lsl64.S31
-rw-r--r--arch/avr32/lib/__avr32_lsr64.S31
-rw-r--r--arch/avr32/lib/clear_user.S76
-rw-r--r--arch/avr32/lib/copy_user.S119
-rw-r--r--arch/avr32/lib/csum_partial.S47
-rw-r--r--arch/avr32/lib/csum_partial_copy_generic.S99
-rw-r--r--arch/avr32/lib/delay.c55
-rw-r--r--arch/avr32/lib/findbit.S154
-rw-r--r--arch/avr32/lib/io-readsl.S24
-rw-r--r--arch/avr32/lib/io-readsw.S43
-rw-r--r--arch/avr32/lib/io-writesl.S20
-rw-r--r--arch/avr32/lib/io-writesw.S38
-rw-r--r--arch/avr32/lib/libgcc.h33
-rw-r--r--arch/avr32/lib/longlong.h98
-rw-r--r--arch/avr32/lib/memcpy.S62
-rw-r--r--arch/avr32/lib/memset.S72
-rw-r--r--arch/avr32/lib/strncpy_from_user.S60
-rw-r--r--arch/avr32/lib/strnlen_user.S67
-rw-r--r--arch/avr32/mach-at32ap/Makefile2
-rw-r--r--arch/avr32/mach-at32ap/at32ap.c90
-rw-r--r--arch/avr32/mach-at32ap/at32ap7000.c876
-rw-r--r--arch/avr32/mach-at32ap/clock.c148
-rw-r--r--arch/avr32/mach-at32ap/clock.h30
-rw-r--r--arch/avr32/mach-at32ap/extint.c171
-rw-r--r--arch/avr32/mach-at32ap/hsmc.c164
-rw-r--r--arch/avr32/mach-at32ap/hsmc.h127
-rw-r--r--arch/avr32/mach-at32ap/intc.c133
-rw-r--r--arch/avr32/mach-at32ap/intc.h327
-rw-r--r--arch/avr32/mach-at32ap/pio.c118
-rw-r--r--arch/avr32/mach-at32ap/pio.h178
-rw-r--r--arch/avr32/mach-at32ap/sm.c289
-rw-r--r--arch/avr32/mach-at32ap/sm.h240
-rw-r--r--arch/avr32/mm/Makefile6
-rw-r--r--arch/avr32/mm/cache.c150
-rw-r--r--arch/avr32/mm/clear_page.S25
-rw-r--r--arch/avr32/mm/copy_page.S28
-rw-r--r--arch/avr32/mm/dma-coherent.c139
-rw-r--r--arch/avr32/mm/fault.c315
-rw-r--r--arch/avr32/mm/init.c480
-rw-r--r--arch/avr32/mm/ioremap.c197
-rw-r--r--arch/avr32/mm/tlb.c378
-rw-r--r--arch/frv/Kconfig12
-rw-r--r--arch/frv/kernel/Makefile5
-rw-r--r--arch/frv/kernel/irq-mb93091.c157
-rw-r--r--arch/frv/kernel/irq-mb93093.c115
-rw-r--r--arch/frv/kernel/irq-mb93493.c160
-rw-r--r--arch/frv/kernel/irq-routing.c291
-rw-r--r--arch/frv/kernel/irq.c741
-rw-r--r--arch/frv/kernel/setup.c1
-rw-r--r--arch/frv/kernel/time.c1
-rw-r--r--arch/frv/mb93090-mb00/pci-irq.c1
-rw-r--r--arch/frv/mm/init.c2
-rw-r--r--arch/h8300/mm/init.c2
-rw-r--r--arch/i386/Kconfig4
-rw-r--r--arch/i386/kernel/apm.c26
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c4
-rw-r--r--arch/i386/kernel/efi_stub.S1
-rw-r--r--arch/i386/kernel/reboot.c12
-rw-r--r--arch/i386/kernel/setup.c23
-rw-r--r--arch/i386/kernel/smp.c66
-rw-r--r--arch/i386/kernel/smpboot.c6
-rw-r--r--arch/i386/kernel/srat.c5
-rw-r--r--arch/i386/kernel/time.c50
-rw-r--r--arch/i386/kernel/time_hpet.c37
-rw-r--r--arch/i386/kernel/traps.c11
-rw-r--r--arch/i386/kernel/vmlinux.lds.S12
-rw-r--r--arch/i386/mach-voyager/voyager_thread.c1
-rw-r--r--arch/i386/mm/boot_ioremap.c7
-rw-r--r--arch/i386/mm/discontig.c33
-rw-r--r--arch/i386/mm/init.c44
-rw-r--r--arch/i386/mm/pgtable.c30
-rw-r--r--arch/i386/power/swsusp.S2
-rw-r--r--arch/ia64/Kconfig9
-rw-r--r--arch/ia64/kernel/acpi.c13
-rw-r--r--arch/ia64/kernel/numa.c34
-rw-r--r--arch/ia64/kernel/perfmon.c1
-rw-r--r--arch/ia64/kernel/topology.c6
-rw-r--r--arch/ia64/kernel/uncached.c2
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_hwperf.c3
-rw-r--r--arch/m32r/mm/init.c2
-rw-r--r--arch/m68knommu/mm/init.c2
-rw-r--r--arch/mips/au1000/common/dbdma.c10
-rw-r--r--arch/mips/mm/init.c4
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c3
-rw-r--r--arch/parisc/mm/init.c4
-rw-r--r--arch/powerpc/kernel/swsusp_32.S4
-rw-r--r--arch/s390/appldata/appldata_mem.c3
-rw-r--r--arch/s390/mm/cmm.c205
-rw-r--r--arch/sh/mm/cache-sh7705.c2
-rw-r--r--arch/sh64/mm/init.c2
-rw-r--r--arch/sparc/mm/srmmu.c2
-rw-r--r--arch/sparc/mm/sun4c.c2
-rw-r--r--arch/sparc64/solaris/misc.c20
-rw-r--r--arch/sparc64/solaris/socksys.c6
-rw-r--r--arch/um/drivers/chan_kern.c2
-rw-r--r--arch/um/drivers/mconsole_kern.c2
-rw-r--r--arch/um/drivers/mconsole_user.c7
-rw-r--r--arch/um/drivers/net_kern.c14
-rw-r--r--arch/um/drivers/pcap_kern.c2
-rw-r--r--arch/um/include/kern_util.h1
-rw-r--r--arch/um/include/longjmp.h5
-rw-r--r--arch/um/include/net_user.h1
-rw-r--r--arch/um/include/os.h5
-rw-r--r--arch/um/include/registers.h3
-rw-r--r--arch/um/include/sysdep-i386/archsetjmp.h19
-rw-r--r--arch/um/include/sysdep-i386/signal.h27
-rw-r--r--arch/um/include/sysdep-x86_64/archsetjmp.h21
-rw-r--r--arch/um/include/sysdep-x86_64/signal.h29
-rw-r--r--arch/um/kernel/exec.c4
-rw-r--r--arch/um/kernel/irq.c34
-rw-r--r--arch/um/kernel/mem.c7
-rw-r--r--arch/um/kernel/process_kern.c3
-rw-r--r--arch/um/kernel/reboot.c13
-rw-r--r--arch/um/kernel/skas/mmu.c2
-rw-r--r--arch/um/kernel/time.c12
-rw-r--r--arch/um/kernel/tlb.c370
-rw-r--r--arch/um/kernel/trap.c11
-rw-r--r--arch/um/os-Linux/helper.c22
-rw-r--r--arch/um/os-Linux/irq.c2
-rw-r--r--arch/um/os-Linux/main.c34
-rw-r--r--arch/um/os-Linux/mem.c6
-rw-r--r--arch/um/os-Linux/process.c13
-rw-r--r--arch/um/os-Linux/sigio.c103
-rw-r--r--arch/um/os-Linux/signal.c38
-rw-r--r--arch/um/os-Linux/skas/process.c26
-rw-r--r--arch/um/os-Linux/start_up.c1
-rw-r--r--arch/um/os-Linux/sys-i386/Makefile2
-rw-r--r--arch/um/os-Linux/sys-i386/registers.c17
-rw-r--r--arch/um/os-Linux/sys-i386/signal.c15
-rw-r--r--arch/um/os-Linux/sys-x86_64/Makefile2
-rw-r--r--arch/um/os-Linux/sys-x86_64/registers.c17
-rw-r--r--arch/um/os-Linux/sys-x86_64/signal.c16
-rw-r--r--arch/um/os-Linux/time.c22
-rw-r--r--arch/um/os-Linux/trap.c1
-rw-r--r--arch/um/os-Linux/uaccess.c3
-rw-r--r--arch/um/os-Linux/util.c5
-rw-r--r--arch/um/sys-i386/Makefile2
-rw-r--r--arch/um/sys-i386/bugs.c9
-rw-r--r--arch/um/sys-i386/ldt.c3
-rw-r--r--arch/um/sys-i386/ptrace_user.c5
-rw-r--r--arch/um/sys-i386/setjmp.S58
-rw-r--r--arch/um/sys-x86_64/Makefile4
-rw-r--r--arch/um/sys-x86_64/setjmp.S54
-rw-r--r--arch/x86_64/Kconfig4
-rw-r--r--arch/x86_64/kernel/e820.c48
-rw-r--r--arch/x86_64/kernel/setup.c1
-rw-r--r--arch/x86_64/kernel/smpboot.c3
-rw-r--r--arch/x86_64/kernel/suspend_asm.S2
-rw-r--r--arch/x86_64/kernel/time.c37
-rw-r--r--arch/x86_64/mm/fault.c6
-rw-r--r--arch/x86_64/mm/init.c2
-rw-r--r--drivers/ata/ata_piix.c2
-rw-r--r--drivers/ata/sata_nv.c6
-rw-r--r--drivers/ata/sata_sis.c6
-rw-r--r--drivers/ata/sata_uli.c6
-rw-r--r--drivers/ata/sata_via.c7
-rw-r--r--drivers/atm/he.c16
-rw-r--r--drivers/base/node.c13
-rw-r--r--drivers/char/rtc.c5
-rw-r--r--drivers/ide/mips/au1xxx-ide.c4
-rw-r--r--drivers/media/video/videodev.c2
-rw-r--r--drivers/mmc/au1xmmc.c2
-rw-r--r--drivers/net/sunlance.c10
-rw-r--r--drivers/serial/serial_core.c14
-rw-r--r--drivers/video/fbsysfs.c12
-rw-r--r--fs/autofs4/expire.c6
-rw-r--r--fs/binfmt_elf.c10
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/jbd/commit.c182
-rw-r--r--fs/proc/proc_misc.c11
-rw-r--r--include/asm-alpha/mmzone.h1
-rw-r--r--include/asm-alpha/pgtable.h9
-rw-r--r--include/asm-arm/pgtable.h8
-rw-r--r--include/asm-arm26/pgtable.h8
-rw-r--r--include/asm-avr32/Kbuild3
-rw-r--r--include/asm-avr32/a.out.h26
-rw-r--r--include/asm-avr32/addrspace.h43
-rw-r--r--include/asm-avr32/arch-at32ap/at91rm9200_pdc.h36
-rw-r--r--include/asm-avr32/arch-at32ap/at91rm9200_usart.h123
-rw-r--r--include/asm-avr32/arch-at32ap/board.h35
-rw-r--r--include/asm-avr32/arch-at32ap/init.h21
-rw-r--r--include/asm-avr32/arch-at32ap/portmux.h16
-rw-r--r--include/asm-avr32/arch-at32ap/sm.h27
-rw-r--r--include/asm-avr32/arch-at32ap/smc.h60
-rw-r--r--include/asm-avr32/asm.h102
-rw-r--r--include/asm-avr32/atomic.h201
-rw-r--r--include/asm-avr32/auxvec.h4
-rw-r--r--include/asm-avr32/bitops.h296
-rw-r--r--include/asm-avr32/bug.h47
-rw-r--r--include/asm-avr32/bugs.h15
-rw-r--r--include/asm-avr32/byteorder.h25
-rw-r--r--include/asm-avr32/cache.h29
-rw-r--r--include/asm-avr32/cachectl.h11
-rw-r--r--include/asm-avr32/cacheflush.h129
-rw-r--r--include/asm-avr32/checksum.h156
-rw-r--r--include/asm-avr32/cputime.h6
-rw-r--r--include/asm-avr32/current.h15
-rw-r--r--include/asm-avr32/delay.h26
-rw-r--r--include/asm-avr32/div64.h6
-rw-r--r--include/asm-avr32/dma-mapping.h320
-rw-r--r--include/asm-avr32/dma.h8
-rw-r--r--include/asm-avr32/elf.h110
-rw-r--r--include/asm-avr32/emergency-restart.h6
-rw-r--r--include/asm-avr32/errno.h6
-rw-r--r--include/asm-avr32/fcntl.h6
-rw-r--r--include/asm-avr32/futex.h6
-rw-r--r--include/asm-avr32/hardirq.h34
-rw-r--r--include/asm-avr32/hw_irq.h9
-rw-r--r--include/asm-avr32/intc.h128
-rw-r--r--include/asm-avr32/io.h253
-rw-r--r--include/asm-avr32/ioctl.h6
-rw-r--r--include/asm-avr32/ioctls.h83
-rw-r--r--include/asm-avr32/ipcbuf.h29
-rw-r--r--include/asm-avr32/irq.h10
-rw-r--r--include/asm-avr32/irqflags.h68
-rw-r--r--include/asm-avr32/kdebug.h38
-rw-r--r--include/asm-avr32/kmap_types.h30
-rw-r--r--include/asm-avr32/kprobes.h34
-rw-r--r--include/asm-avr32/linkage.h7
-rw-r--r--include/asm-avr32/local.h6
-rw-r--r--include/asm-avr32/mach/serial_at91.h33
-rw-r--r--include/asm-avr32/mman.h17
-rw-r--r--include/asm-avr32/mmu.h10
-rw-r--r--include/asm-avr32/mmu_context.h148
-rw-r--r--include/asm-avr32/module.h28
-rw-r--r--include/asm-avr32/msgbuf.h31
-rw-r--r--include/asm-avr32/mutex.h9
-rw-r--r--include/asm-avr32/namei.h7
-rw-r--r--include/asm-avr32/numnodes.h7
-rw-r--r--include/asm-avr32/ocd.h78
-rw-r--r--include/asm-avr32/page.h112
-rw-r--r--include/asm-avr32/param.h23
-rw-r--r--include/asm-avr32/pci.h8
-rw-r--r--include/asm-avr32/percpu.h6
-rw-r--r--include/asm-avr32/pgalloc.h96
-rw-r--r--include/asm-avr32/pgtable-2level.h47
-rw-r--r--include/asm-avr32/pgtable.h408
-rw-r--r--include/asm-avr32/poll.h27
-rw-r--r--include/asm-avr32/posix_types.h129
-rw-r--r--include/asm-avr32/processor.h147
-rw-r--r--include/asm-avr32/ptrace.h154
-rw-r--r--include/asm-avr32/resource.h6
-rw-r--r--include/asm-avr32/scatterlist.h21
-rw-r--r--include/asm-avr32/sections.h6
-rw-r--r--include/asm-avr32/semaphore.h109
-rw-r--r--include/asm-avr32/sembuf.h25
-rw-r--r--include/asm-avr32/setup.h141
-rw-r--r--include/asm-avr32/shmbuf.h42
-rw-r--r--include/asm-avr32/shmparam.h6
-rw-r--r--include/asm-avr32/sigcontext.h34
-rw-r--r--include/asm-avr32/siginfo.h6
-rw-r--r--include/asm-avr32/signal.h168
-rw-r--r--include/asm-avr32/socket.h53
-rw-r--r--include/asm-avr32/sockios.h12
-rw-r--r--include/asm-avr32/stat.h79
-rw-r--r--include/asm-avr32/statfs.h6
-rw-r--r--include/asm-avr32/string.h17
-rw-r--r--include/asm-avr32/sysreg.h332
-rw-r--r--include/asm-avr32/system.h155
-rw-r--r--include/asm-avr32/termbits.h173
-rw-r--r--include/asm-avr32/termios.h80
-rw-r--r--include/asm-avr32/thread_info.h106
-rw-r--r--include/asm-avr32/timex.h40
-rw-r--r--include/asm-avr32/tlb.h32
-rw-r--r--include/asm-avr32/tlbflush.h40
-rw-r--r--include/asm-avr32/topology.h6
-rw-r--r--include/asm-avr32/traps.h23
-rw-r--r--include/asm-avr32/types.h70
-rw-r--r--include/asm-avr32/uaccess.h335
-rw-r--r--include/asm-avr32/ucontext.h12
-rw-r--r--include/asm-avr32/unaligned.h25
-rw-r--r--include/asm-avr32/unistd.h387
-rw-r--r--include/asm-avr32/user.h65
-rw-r--r--include/asm-cris/pgtable.h4
-rw-r--r--include/asm-frv/bitops.h96
-rw-r--r--include/asm-frv/cpu-irqs.h54
-rw-r--r--include/asm-frv/hardirq.h5
-rw-r--r--include/asm-frv/irq-routing.h70
-rw-r--r--include/asm-frv/irq.h26
-rw-r--r--include/asm-frv/mb93091-fpga-irqs.h6
-rw-r--r--include/asm-frv/mb93093-fpga-irqs.h6
-rw-r--r--include/asm-frv/mb93493-irqs.h6
-rw-r--r--include/asm-frv/mb93493-regs.h2
-rw-r--r--include/asm-frv/pgtable.h8
-rw-r--r--include/asm-generic/4level-fixup.h4
-rw-r--r--include/asm-generic/percpu.h4
-rw-r--r--include/asm-generic/pgtable-nopmd.h2
-rw-r--r--include/asm-generic/pgtable-nopud.h2
-rw-r--r--include/asm-generic/pgtable.h3
-rw-r--r--include/asm-generic/vmlinux.lds.h3
-rw-r--r--include/asm-i386/Kbuild1
-rw-r--r--include/asm-i386/dma-mapping.h9
-rw-r--r--include/asm-i386/fixmap.h7
-rw-r--r--include/asm-i386/mmzone.h6
-rw-r--r--include/asm-i386/pgtable-2level.h3
-rw-r--r--include/asm-i386/pgtable-3level.h4
-rw-r--r--include/asm-i386/pgtable.h45
-rw-r--r--include/asm-i386/processor.h60
-rw-r--r--include/asm-i386/ptrace-abi.h39
-rw-r--r--include/asm-i386/ptrace.h35
-rw-r--r--include/asm-i386/sync_bitops.h156
-rw-r--r--include/asm-i386/system.h36
-rw-r--r--include/asm-ia64/numa.h4
-rw-r--r--include/asm-ia64/pgtable.h14
-rw-r--r--include/asm-ia64/smp.h2
-rw-r--r--include/asm-m32r/pgtable-2level.h6
-rw-r--r--include/asm-m32r/pgtable.h4
-rw-r--r--include/asm-m68k/motorola_pgtable.h1
-rw-r--r--include/asm-mips/mach-au1x00/au1xxx_dbdma.h6
-rw-r--r--include/asm-mips/pgtable-32.h4
-rw-r--r--include/asm-mips/pgtable-64.h10
-rw-r--r--include/asm-mips/pgtable.h2
-rw-r--r--include/asm-parisc/pgtable.h9
-rw-r--r--include/asm-powerpc/pgtable-4k.h5
-rw-r--r--include/asm-powerpc/pgtable.h11
-rw-r--r--include/asm-ppc/pgtable.h8
-rw-r--r--include/asm-s390/percpu.h20
-rw-r--r--include/asm-s390/pgtable.h10
-rw-r--r--include/asm-s390/processor.h2
-rw-r--r--include/asm-sh/pgtable-2level.h5
-rw-r--r--include/asm-sh/pgtable.h4
-rw-r--r--include/asm-sh64/pgtable.h6
-rw-r--r--include/asm-sparc/pgtable.h4
-rw-r--r--include/asm-sparc64/pgtable.h5
-rw-r--r--include/asm-um/pgtable-2level.h2
-rw-r--r--include/asm-um/pgtable-3level.h5
-rw-r--r--include/asm-um/pgtable.h4
-rw-r--r--include/asm-um/processor-generic.h4
-rw-r--r--include/asm-um/ptrace-generic.h14
-rw-r--r--include/asm-um/ptrace-x86_64.h4
-rw-r--r--include/asm-x86_64/Kbuild1
-rw-r--r--include/asm-x86_64/e820.h1
-rw-r--r--include/asm-x86_64/percpu.h12
-rw-r--r--include/asm-x86_64/pgtable.h16
-rw-r--r--include/asm-x86_64/ptrace-abi.h51
-rw-r--r--include/asm-x86_64/ptrace.h59
-rw-r--r--include/asm-x86_64/smp.h2
-rw-r--r--include/asm-xtensa/pgtable.h4
-rw-r--r--include/linux/bootmem.h100
-rw-r--r--include/linux/console.h5
-rw-r--r--include/linux/cpu.h8
-rw-r--r--include/linux/dccp.h14
-rw-r--r--include/linux/elf-em.h1
-rw-r--r--include/linux/elfnote.h90
-rw-r--r--include/linux/gfp.h36
-rw-r--r--include/linux/highmem.h5
-rw-r--r--include/linux/irq.h6
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/mempolicy.h4
-rw-r--r--include/linux/mm.h128
-rw-r--r--include/linux/mmzone.h120
-rw-r--r--include/linux/netfilter/Kbuild2
-rw-r--r--include/linux/page-flags.h35
-rw-r--r--include/linux/pagemap.h15
-rw-r--r--include/linux/percpu.h89
-rw-r--r--include/linux/resume-trace.h24
-rw-r--r--include/linux/rmap.h14
-rw-r--r--include/linux/selinux.h29
-rw-r--r--include/linux/slab.h29
-rw-r--r--include/linux/smp.h3
-rw-r--r--include/linux/suspend.h32
-rw-r--r--include/linux/swap.h12
-rw-r--r--include/linux/sysctl.h1
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--include/linux/vmstat.h18
-rw-r--r--include/linux/writeback.h1
-rw-r--r--include/net/cipso_ipv4.h23
-rw-r--r--include/net/netlabel.h57
-rw-r--r--include/net/netlink.h20
-rw-r--r--kernel/audit.c14
-rw-r--r--kernel/auditfilter.c2
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/cpu.c138
-rw-r--r--kernel/cpuset.c6
-rw-r--r--kernel/irq/handle.c2
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/power/Kconfig11
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/disk.c7
-rw-r--r--kernel/power/main.c40
-rw-r--r--kernel/power/power.h59
-rw-r--r--kernel/power/smp.c62
-rw-r--r--kernel/power/snapshot.c1155
-rw-r--r--kernel/power/swap.c270
-rw-r--r--kernel/power/swsusp.c5
-rw-r--r--kernel/power/user.c15
-rw-r--r--kernel/printk.c3
-rw-r--r--kernel/profile.c16
-rw-r--r--kernel/sched.c54
-rw-r--r--kernel/sysctl.c11
-rw-r--r--lib/Kconfig.debug4
-rw-r--r--mm/Makefile2
-rw-r--r--mm/allocpercpu.c129
-rw-r--r--mm/bootmem.c202
-rw-r--r--mm/filemap.c25
-rw-r--r--mm/fremap.c4
-rw-r--r--mm/highmem.c13
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/internal.h4
-rw-r--r--mm/memory.c77
-rw-r--r--mm/mempolicy.c19
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mmap.c12
-rw-r--r--mm/mprotect.c51
-rw-r--r--mm/msync.c196
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/oom_kill.c97
-rw-r--r--mm/page-writeback.c29
-rw-r--r--mm/page_alloc.c233
-rw-r--r--mm/page_io.c48
-rw-r--r--mm/rmap.c65
-rw-r--r--mm/shmem.c1
-rw-r--r--mm/slab.c310
-rw-r--r--mm/slob.c49
-rw-r--r--mm/swap.c49
-rw-r--r--mm/vmalloc.c8
-rw-r--r--mm/vmscan.c110
-rw-r--r--mm/vmstat.c49
-rw-r--r--net/dccp/Kconfig16
-rw-r--r--net/dccp/Makefile2
-rw-r--r--net/dccp/ccids/ccid2.c2
-rw-r--r--net/dccp/ccids/ccid3.c2
-rw-r--r--net/dccp/ipv4.c3
-rw-r--r--net/dccp/probe.c198
-rw-r--r--net/dccp/proto.c11
-rw-r--r--net/ipv4/Kconfig53
-rw-r--r--net/ipv4/cipso_ipv4.c267
-rw-r--r--net/ipv4/sysctl_net_ipv4.c6
-rw-r--r--net/ipv4/tcp_cong.c2
-rw-r--r--net/netlabel/Kconfig5
-rw-r--r--net/netlabel/netlabel_cipso_v4.c628
-rw-r--r--net/netlabel/netlabel_cipso_v4.h225
-rw-r--r--net/netlabel/netlabel_domainhash.c183
-rw-r--r--net/netlabel/netlabel_domainhash.h6
-rw-r--r--net/netlabel/netlabel_kapi.c23
-rw-r--r--net/netlabel/netlabel_mgmt.c541
-rw-r--r--net/netlabel/netlabel_mgmt.h211
-rw-r--r--net/netlabel/netlabel_unlabeled.c79
-rw-r--r--net/netlabel/netlabel_unlabeled.h41
-rw-r--r--net/netlabel/netlabel_user.c82
-rw-r--r--net/netlabel/netlabel_user.h141
-rw-r--r--security/selinux/Kconfig37
-rw-r--r--security/selinux/exports.c13
-rw-r--r--security/selinux/hooks.c69
-rw-r--r--security/selinux/include/objsec.h4
-rw-r--r--security/selinux/include/security.h7
-rw-r--r--security/selinux/ss/mls.c21
-rw-r--r--security/selinux/ss/policydb.c27
-rw-r--r--security/selinux/ss/policydb.h7
-rw-r--r--security/selinux/ss/services.c30
-rw-r--r--sound/oss/au1550_ac97.c6
-rw-r--r--sound/sparc/amd7930.c20
497 files changed, 25494 insertions, 5470 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 71d05f481727..766abdab94e7 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1363,6 +1363,11 @@ running once the system is up.
reserve= [KNL,BUGS] Force the kernel to ignore some iomem area
+ reservetop= [IA-32]
+ Format: nn[KMG]
+ Reserves a hole at the top of the kernel virtual
+ address space.
+
resume= [SWSUSP]
Specify the partition device for software suspend
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index c45daabd3bfe..74563b38ffd9 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -1,7 +1,6 @@
DCCP protocol
============
-Last updated: 10 November 2005
Contents
========
@@ -42,8 +41,11 @@ Socket options
DCCP_SOCKOPT_PACKET_SIZE is used for CCID3 to set default packet size for
calculations.
-DCCP_SOCKOPT_SERVICE sets the service. This is compulsory as per the
-specification. If you don't set it you will get EPROTO.
+DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
+service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
+the socket will fall back to 0 (which means that no meaningful service code
+is present). Connecting sockets set at most one service option; for
+listening sockets, multiple service codes can be specified.
Notes
=====
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
index 4117802af0f8..a66bec222b16 100644
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.txt
@@ -52,3 +52,18 @@ suspend image will be as small as possible.
Reading from this file will display the current image size limit, which
is set to 500 MB by default.
+
+/sys/power/pm_trace controls the code which saves the last PM event point in
+the RTC across reboots, so that you can debug a machine that just hangs
+during suspend (or more commonly, during resume). Namely, the RTC is only
+used to save the last PM event point if this file contains '1'. Initially it
+contains '0' which may be changed to '1' by writing a string representing a
+nonzero integer into it.
+
+To use this debugging feature you should attempt to suspend the machine, then
+reboot it and run
+
+ dmesg -s 1000000 | grep 'hash matches'
+
+CAUTION: Using it will cause your machine's real-time (CMOS) clock to be
+set to a random invalid time after a resume.
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 7cee90223d3a..20d0d797f539 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -29,6 +29,7 @@ Currently, these files are in /proc/sys/vm:
- drop-caches
- zone_reclaim_mode
- min_unmapped_ratio
+- min_slab_ratio
- panic_on_oom
==============================================================
@@ -138,7 +139,6 @@ This is value ORed together of
1 = Zone reclaim on
2 = Zone reclaim writes dirty pages out
4 = Zone reclaim swaps pages
-8 = Also do a global slab reclaim pass
zone_reclaim_mode is set during bootup to 1 if it is determined that pages
from remote zones will cause a measurable performance reduction. The
@@ -162,18 +162,13 @@ Allowing regular swap effectively restricts allocations to the local
node unless explicitly overridden by memory policies or cpuset
configurations.
-It may be advisable to allow slab reclaim if the system makes heavy
-use of files and builds up large slab caches. However, the slab
-shrink operation is global, may take a long time and free slabs
-in all nodes of the system.
-
=============================================================
min_unmapped_ratio:
This is available only on NUMA kernels.
-A percentage of the file backed pages in each zone. Zone reclaim will only
+A percentage of the total pages in each zone. Zone reclaim will only
occur if more than this percentage of pages are file backed and unmapped.
This is to insure that a minimal amount of local pages is still available for
file I/O even if the node is overallocated.
@@ -182,6 +177,24 @@ The default is 1 percent.
=============================================================
+min_slab_ratio:
+
+This is available only on NUMA kernels.
+
+A percentage of the total pages in each zone. On Zone reclaim
+(fallback from the local zone occurs) slabs will be reclaimed if more
+than this percentage of pages in a zone are reclaimable slab pages.
+This insures that the slab growth stays under control even in NUMA
+systems that rarely perform global reclaim.
+
+The default is 5 percent.
+
+Note that slab reclaim is triggered in a per zone / node fashion.
+The process of reclaiming slab memory is currently not node specific
+and may not be fast.
+
+=============================================================
+
panic_on_oom
This enables or disables panic on out-of-memory feature. If this is set to 1,
diff --git a/MAINTAINERS b/MAINTAINERS
index bd446e251d5b..63673e6513b7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -443,6 +443,23 @@ W: http://people.redhat.com/sgrubb/audit/
T: git kernel.org:/pub/scm/linux/kernel/git/dwmw2/audit-2.6.git
S: Maintained
+AVR32 ARCHITECTURE
+P: Atmel AVR32 Support Team
+M: avr32@atmel.com
+P: Haavard Skinnemoen
+M: hskinnemoen@atmel.com
+W: http://www.atmel.com/products/AVR32/
+W: http://avr32linux.org/
+W: http://avrfreaks.net/
+S: Supported
+
+AVR32/AT32AP MACHINE SUPPORT
+P: Atmel AVR32 Support Team
+M: avr32@atmel.com
+P: Haavard Skinnemoen
+M: hskinnemoen@atmel.com
+S: Supported
+
AX.25 NETWORK LAYER
P: Ralf Baechle
M: ralf@linux-mips.org
@@ -2031,6 +2048,13 @@ L: netfilter@lists.netfilter.org
L: netfilter-devel@lists.netfilter.org
S: Supported
+NETLABEL
+P: Paul Moore
+M: paul.moore@hp.com
+W: http://netlabel.sf.net
+L: netdev@vger.kernel.org
+S: Supported
+
NETROM NETWORK LAYER
P: Ralf Baechle
M: ralf@linux-mips.org
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 213c7850d5fb..2b36afd8e969 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -381,7 +381,7 @@ config ALPHA_EV56
config ALPHA_EV56
prompt "EV56 CPU (speed >= 333MHz)?"
- depends on ALPHA_NORITAKE && ALPHA_PRIMO
+ depends on ALPHA_NORITAKE || ALPHA_PRIMO
config ALPHA_EV56
prompt "EV56 CPU (speed >= 400MHz)?"
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 917dad1b74c8..550f4907d613 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -270,7 +270,7 @@ callback_init(void * kernel_end)
void
paging_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned long zones_size[MAX_NR_ZONES] = {0, };
unsigned long dma_pfn, high_pfn;
dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 88a999df0ab3..591fc3187c7f 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -177,7 +177,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
* Free the page table, if there was one.
*/
if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE)
- pte_free_kernel(pmd_page_kernel(pmd));
+ pte_free_kernel(pmd_page_vaddr(pmd));
}
addr += PGDIR_SIZE;
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
new file mode 100644
index 000000000000..5f1694eea842
--- /dev/null
+++ b/arch/avr32/Kconfig
@@ -0,0 +1,196 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+mainmenu "Linux Kernel Configuration"
+
+config AVR32
+ bool
+ default y
+ # With EMBEDDED=n, we get lots of stuff automatically selected
+ # that we usually don't need on AVR32.
+ select EMBEDDED
+ help
+ AVR32 is a high-performance 32-bit RISC microprocessor core,
+ designed for cost-sensitive embedded applications, with particular
+ emphasis on low power consumption and high code density.
+
+ There is an AVR32 Linux project with a web page at
+ http://avr32linux.org/.
+
+config UID16
+ bool
+
+config GENERIC_HARDIRQS
+ bool
+ default y
+
+config HARDIRQS_SW_RESEND
+ bool
+ default y
+
+config GENERIC_IRQ_PROBE
+ bool
+ default y
+
+config RWSEM_GENERIC_SPINLOCK
+ bool
+ default y
+
+config GENERIC_TIME
+ bool
+ default y
+
+config RWSEM_XCHGADD_ALGORITHM
+ bool
+
+config GENERIC_BUST_SPINLOCK
+ bool
+
+config GENERIC_HWEIGHT
+ bool
+ default y
+
+config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+
+source "init/Kconfig"
+
+menu "System Type and features"
+
+config SUBARCH_AVR32B
+ bool
+config MMU
+ bool
+config PERFORMANCE_COUNTERS
+ bool
+
+config PLATFORM_AT32AP
+ bool
+ select SUBARCH_AVR32B
+ select MMU
+ select PERFORMANCE_COUNTERS
+
+choice
+ prompt "AVR32 CPU type"
+ default CPU_AT32AP7000
+
+config CPU_AT32AP7000
+ bool "AT32AP7000"
+ select PLATFORM_AT32AP
+endchoice
+
+#
+# CPU Daughterboards for ATSTK1000
+config BOARD_ATSTK1002
+ bool
+
+choice
+ prompt "AVR32 board type"
+ default BOARD_ATSTK1000
+
+config BOARD_ATSTK1000
+ bool "ATSTK1000 evaluation board"
+ select BOARD_ATSTK1002 if CPU_AT32AP7000
+endchoice
+
+choice
+ prompt "Boot loader type"
+ default LOADER_U_BOOT
+
+config LOADER_U_BOOT
+ bool "U-Boot (or similar) bootloader"
+endchoice
+
+config LOAD_ADDRESS
+ hex
+ default 0x10000000 if LOADER_U_BOOT=y && CPU_AT32AP7000=y
+
+config ENTRY_ADDRESS
+ hex
+ default 0x90000000 if LOADER_U_BOOT=y && CPU_AT32AP7000=y
+
+config PHYS_OFFSET
+ hex
+ default 0x10000000 if CPU_AT32AP7000=y
+
+source "kernel/Kconfig.preempt"
+
+config HAVE_ARCH_BOOTMEM_NODE
+ bool
+ default n
+
+config ARCH_HAVE_MEMORY_PRESENT
+ bool
+ default n
+
+config NEED_NODE_MEMMAP_SIZE
+ bool
+ default n
+
+config ARCH_FLATMEM_ENABLE
+ bool
+ default y
+
+config ARCH_DISCONTIGMEM_ENABLE
+ bool
+ default n
+
+config ARCH_SPARSEMEM_ENABLE
+ bool
+ default n
+
+source "mm/Kconfig"
+
+config OWNERSHIP_TRACE
+ bool "Ownership trace support"
+ default y
+ help
+ Say Y to generate an Ownership Trace message on every context switch,
+ enabling Nexus-compliant debuggers to keep track of the PID of the
+ currently executing task.
+
+# FPU emulation goes here
+
+source "kernel/Kconfig.hz"
+
+config CMDLINE
+ string "Default kernel command line"
+ default ""
+ help
+ If you don't have a boot loader capable of passing a command line string
+ to the kernel, you may specify one here. As a minimum, you should specify
+ the memory size and the root device (e.g., mem=8M, root=/dev/nfs).
+
+endmenu
+
+menu "Bus options"
+
+config PCI
+ bool
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+endmenu
+
+menu "Executable file formats"
+source "fs/Kconfig.binfmt"
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/avr32/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
diff --git a/arch/avr32/Kconfig.debug b/arch/avr32/Kconfig.debug
new file mode 100644
index 000000000000..64ace00fe6cb
--- /dev/null
+++ b/arch/avr32/Kconfig.debug
@@ -0,0 +1,19 @@
+menu "Kernel hacking"
+
+config TRACE_IRQFLAGS_SUPPORT
+ bool
+ default y
+
+source "lib/Kconfig.debug"
+
+config KPROBES
+ bool "Kprobes"
+ depends on DEBUG_KERNEL
+ help
+ Kprobes allows you to trap at almost any kernel address and
+ execute a callback function. register_kprobe() establishes
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+
+endmenu
diff --git a/arch/avr32/Makefile b/arch/avr32/Makefile
new file mode 100644
index 000000000000..cefc95a73980
--- /dev/null
+++ b/arch/avr32/Makefile
@@ -0,0 +1,84 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2004-2006 Atmel Corporation.
+
+# Default target when executing plain make
+.PHONY: all
+all: uImage vmlinux.elf linux.lst
+
+KBUILD_DEFCONFIG := atstk1002_defconfig
+
+CFLAGS += -pipe -fno-builtin -mno-pic
+AFLAGS += -mrelax -mno-pic
+CFLAGS_MODULE += -mno-relax
+LDFLAGS_vmlinux += --relax
+
+cpuflags-$(CONFIG_CPU_AP7000) += -mcpu=ap7000
+
+CFLAGS += $(cpuflags-y)
+AFLAGS += $(cpuflags-y)
+
+CHECKFLAGS += -D__avr32__
+
+LIBGCC := $(shell $(CC) $(CFLAGS) -print-libgcc-file-name)
+
+head-$(CONFIG_LOADER_U_BOOT) += arch/avr32/boot/u-boot/head.o
+head-y += arch/avr32/kernel/head.o
+core-$(CONFIG_PLATFORM_AT32AP) += arch/avr32/mach-at32ap/
+core-$(CONFIG_BOARD_ATSTK1000) += arch/avr32/boards/atstk1000/
+core-$(CONFIG_LOADER_U_BOOT) += arch/avr32/boot/u-boot/
+core-y += arch/avr32/kernel/
+core-y += arch/avr32/mm/
+libs-y += arch/avr32/lib/ #$(LIBGCC)
+
+archincdir-$(CONFIG_PLATFORM_AT32AP) := arch-at32ap
+
+include/asm-avr32/.arch: $(wildcard include/config/platform/*.h) include/config/auto.conf
+ @echo ' SYMLINK include/asm-avr32/arch -> include/asm-avr32/$(archincdir-y)'
+ifneq ($(KBUILD_SRC),)
+ $(Q)mkdir -p include/asm-avr32
+ $(Q)ln -fsn $(srctree)/include/asm-avr32/$(archincdir-y) include/asm-avr32/arch
+else
+ $(Q)ln -fsn $(archincdir-y) include/asm-avr32/arch
+endif
+ @touch $@
+
+archprepare: include/asm-avr32/.arch
+
+BOOT_TARGETS := vmlinux.elf vmlinux.bin uImage uImage.srec
+
+.PHONY: $(BOOT_TARGETS) install
+
+boot := arch/$(ARCH)/boot/images
+
+ KBUILD_IMAGE := $(boot)/uImage
+vmlinux.elf: KBUILD_IMAGE := $(boot)/vmlinux.elf
+vmlinux.cso: KBUILD_IMAGE := $(boot)/vmlinux.cso
+uImage.srec: KBUILD_IMAGE := $(boot)/uImage.srec
+uImage: KBUILD_IMAGE := $(boot)/uImage
+
+quiet_cmd_listing = LST $@
+ cmd_listing = avr32-linux-objdump $(OBJDUMPFLAGS) -lS $< > $@
+quiet_cmd_disasm = DIS $@
+ cmd_disasm = avr32-linux-objdump $(OBJDUMPFLAGS) -d $< > $@
+
+vmlinux.elf vmlinux.bin uImage.srec uImage vmlinux.cso: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+install: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
+
+linux.s: vmlinux
+ $(call if_changed,disasm)
+
+linux.lst: vmlinux
+ $(call if_changed,listing)
+
+define archhelp
+ @echo '* vmlinux.elf - ELF image with load address 0'
+ @echo ' vmlinux.cso - PathFinder CSO image'
+ @echo ' uImage - Create a bootable image for U-Boot'
+endef
diff --git a/arch/avr32/boards/atstk1000/Makefile b/arch/avr32/boards/atstk1000/Makefile
new file mode 100644
index 000000000000..df9499480530
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/Makefile
@@ -0,0 +1,2 @@
+obj-y += setup.o spi.o flash.o
+obj-$(CONFIG_BOARD_ATSTK1002) += atstk1002.o
diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c
new file mode 100644
index 000000000000..49164e9aadd6
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/atstk1002.c
@@ -0,0 +1,37 @@
+/*
+ * ATSTK1002 daughterboard-specific init code
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+
+#include <asm/arch/board.h>
+
+struct eth_platform_data __initdata eth0_data = {
+ .valid = 1,
+ .mii_phy_addr = 0x10,
+ .is_rmii = 0,
+ .hw_addr = { 0x6a, 0x87, 0x71, 0x14, 0xcd, 0xcb },
+};
+
+extern struct lcdc_platform_data atstk1000_fb0_data;
+
+static int __init atstk1002_init(void)
+{
+ at32_add_system_devices();
+
+ at32_add_device_usart(1); /* /dev/ttyS0 */
+ at32_add_device_usart(2); /* /dev/ttyS1 */
+ at32_add_device_usart(3); /* /dev/ttyS2 */
+
+ at32_add_device_eth(0, &eth0_data);
+ at32_add_device_spi(0);
+ at32_add_device_lcdc(0, &atstk1000_fb0_data);
+
+ return 0;
+}
+postcore_initcall(atstk1002_init);
diff --git a/arch/avr32/boards/atstk1000/flash.c b/arch/avr32/boards/atstk1000/flash.c
new file mode 100644
index 000000000000..aac4300cca12
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/flash.c
@@ -0,0 +1,95 @@
+/*
+ * ATSTK1000 board-specific flash initialization
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+
+#include <asm/arch/smc.h>
+
+static struct smc_config flash_config __initdata = {
+ .ncs_read_setup = 0,
+ .nrd_setup = 40,
+ .ncs_write_setup = 0,
+ .nwe_setup = 10,
+
+ .ncs_read_pulse = 80,
+ .nrd_pulse = 40,
+ .ncs_write_pulse = 65,
+ .nwe_pulse = 55,
+
+ .read_cycle = 120,
+ .write_cycle = 120,
+
+ .bus_width = 2,
+ .nrd_controlled = 1,
+ .nwe_controlled = 1,
+ .byte_write = 1,
+};
+
+static struct mtd_partition flash_parts[] = {
+ {
+ .name = "u-boot",
+ .offset = 0x00000000,
+ .size = 0x00020000, /* 128 KiB */
+ .mask_flags = MTD_WRITEABLE,
+ },
+ {
+ .name = "root",
+ .offset = 0x00020000,
+ .size = 0x007d0000,
+ },
+ {
+ .name = "env",
+ .offset = 0x007f0000,
+ .size = 0x00010000,
+ .mask_flags = MTD_WRITEABLE,
+ },
+};
+
+static struct physmap_flash_data flash_data = {
+ .width = 2,
+ .nr_parts = ARRAY_SIZE(flash_parts),
+ .parts = flash_parts,
+};
+
+static struct resource flash_resource = {
+ .start = 0x00000000,
+ .end = 0x007fffff,
+ .flags = IORESOURCE_MEM,
+};
+
+static struct platform_device flash_device = {
+ .name = "physmap-flash",
+ .id = 0,
+ .resource = &flash_resource,
+ .num_resources = 1,
+ .dev = {
+ .platform_data = &flash_data,
+ },
+};
+
+/* This needs to be called after the SMC has been initialized */
+static int __init atstk1000_flash_init(void)
+{
+ int ret;
+
+ ret = smc_set_configuration(0, &flash_config);
+ if (ret < 0) {
+ printk(KERN_ERR "atstk1000: failed to set NOR flash timing\n");
+ return ret;
+ }
+
+ platform_device_register(&flash_device);
+
+ return 0;
+}
+device_initcall(atstk1000_flash_init);
diff --git a/arch/avr32/boards/atstk1000/setup.c b/arch/avr32/boards/atstk1000/setup.c
new file mode 100644
index 000000000000..191ab85de9a3
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/setup.c
@@ -0,0 +1,59 @@
+/*
+ * ATSTK1000 board-specific setup code.
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/linkage.h>
+
+#include <asm/setup.h>
+
+#include <asm/arch/board.h>
+
+/* Initialized by bootloader-specific startup code. */
+struct tag *bootloader_tags __initdata;
+
+struct lcdc_platform_data __initdata atstk1000_fb0_data;
+
+asmlinkage void __init board_early_init(void)
+{
+ extern void sdram_init(void);
+
+#ifdef CONFIG_LOADER_STANDALONE
+ sdram_init();
+#endif
+}
+
+void __init board_setup_fbmem(unsigned long fbmem_start,
+ unsigned long fbmem_size)
+{
+ if (!fbmem_size)
+ return;
+
+ if (!fbmem_start) {
+ void *fbmem;
+
+ fbmem = alloc_bootmem_low_pages(fbmem_size);
+ fbmem_start = __pa(fbmem);
+ } else {
+ pg_data_t *pgdat;
+
+ for_each_online_pgdat(pgdat) {
+ if (fbmem_start >= pgdat->bdata->node_boot_start
+ && fbmem_start <= pgdat->bdata->node_low_pfn)
+ reserve_bootmem_node(pgdat, fbmem_start,
+ fbmem_size);
+ }
+ }
+
+ printk("%luKiB framebuffer memory at address 0x%08lx\n",
+ fbmem_size >> 10, fbmem_start);
+ atstk1000_fb0_data.fbmem_start = fbmem_start;
+ atstk1000_fb0_data.fbmem_size = fbmem_size;
+}
diff --git a/arch/avr32/boards/atstk1000/spi.c b/arch/avr32/boards/atstk1000/spi.c
new file mode 100644
index 000000000000..567726c82c6e
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/spi.c
@@ -0,0 +1,27 @@
+/*
+ * ATSTK1000 SPI devices
+ *
+ * Copyright (C) 2005 Atmel Norway
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/device.h>
+#include <linux/spi/spi.h>
+
+static struct spi_board_info spi_board_info[] __initdata = {
+ {
+ .modalias = "ltv350qv",
+ .max_speed_hz = 16000000,
+ .bus_num = 0,
+ .chip_select = 1,
+ },
+};
+
+static int board_init_spi(void)
+{
+ spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
+ return 0;
+}
+arch_initcall(board_init_spi);
diff --git a/arch/avr32/boot/images/Makefile b/arch/avr32/boot/images/Makefile
new file mode 100644
index 000000000000..ccd74eeecec3
--- /dev/null
+++ b/arch/avr32/boot/images/Makefile
@@ -0,0 +1,62 @@
+#
+# Copyright (C) 2004-2006 Atmel Corporation
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+
+MKIMAGE := $(srctree)/scripts/mkuboot.sh
+
+extra-y := vmlinux.bin vmlinux.gz
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary
+$(obj)/vmlinux.bin: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,gzip)
+
+quiet_cmd_uimage = UIMAGE $@
+ cmd_uimage = $(CONFIG_SHELL) $(MKIMAGE) -A avr32 -O linux -T kernel \
+ -C gzip -a $(CONFIG_LOAD_ADDRESS) -e $(CONFIG_ENTRY_ADDRESS) \
+ -n 'Linux-$(KERNELRELEASE)' -d $< $@
+
+targets += uImage uImage.srec
+$(obj)/uImage: $(obj)/vmlinux.gz
+ $(call if_changed,uimage)
+ @echo ' Image $@ is ready'
+
+OBJCOPYFLAGS_uImage.srec := -I binary -O srec
+$(obj)/uImage.srec: $(obj)/uImage
+ $(call if_changed,objcopy)
+
+OBJCOPYFLAGS_vmlinux.elf := --change-section-lma .text-0x80000000 \
+ --change-section-lma __ex_table-0x80000000 \
+ --change-section-lma .rodata-0x80000000 \
+ --change-section-lma .data-0x80000000 \
+ --change-section-lma .init-0x80000000 \
+ --change-section-lma .bss-0x80000000 \
+ --change-section-lma .initrd-0x80000000 \
+ --change-section-lma __param-0x80000000 \
+ --change-section-lma __ksymtab-0x80000000 \
+ --change-section-lma __ksymtab_gpl-0x80000000 \
+ --change-section-lma __kcrctab-0x80000000 \
+ --change-section-lma __kcrctab_gpl-0x80000000 \
+ --change-section-lma __ksymtab_strings-0x80000000 \
+ --change-section-lma .got-0x80000000 \
+ --set-start 0xa0000000
+$(obj)/vmlinux.elf: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+quiet_cmd_sfdwarf = SFDWARF $@
+ cmd_sfdwarf = sfdwarf $< TO $@ GNUAVR IW $(SFDWARF_FLAGS) > $(obj)/sfdwarf.log
+
+$(obj)/vmlinux.cso: $(obj)/vmlinux.elf FORCE
+ $(call if_changed,sfdwarf)
+
+install: $(BOOTIMAGE)
+ sh $(srctree)/install-kernel.sh $<
+
+# Generated files to be removed upon make clean
+clean-files := vmlinux* uImage uImage.srec
diff --git a/arch/avr32/boot/u-boot/Makefile b/arch/avr32/boot/u-boot/Makefile
new file mode 100644
index 000000000000..125ddc96c275
--- /dev/null
+++ b/arch/avr32/boot/u-boot/Makefile
@@ -0,0 +1,3 @@
+extra-y := head.o
+
+obj-y := empty.o
diff --git a/arch/avr32/boot/u-boot/empty.S b/arch/avr32/boot/u-boot/empty.S
new file mode 100644
index 000000000000..8ac91a5f12f0
--- /dev/null
+++ b/arch/avr32/boot/u-boot/empty.S
@@ -0,0 +1 @@
+/* Empty file */
diff --git a/arch/avr32/boot/u-boot/head.S b/arch/avr32/boot/u-boot/head.S
new file mode 100644
index 000000000000..4488fa27fe94
--- /dev/null
+++ b/arch/avr32/boot/u-boot/head.S
@@ -0,0 +1,60 @@
+/*
+ * Startup code for use with the u-boot bootloader.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/setup.h>
+
+ /*
+ * The kernel is loaded where we want it to be and all caches
+ * have just been flushed. We get two parameters from u-boot:
+ *
+ * r12 contains a magic number (ATAG_MAGIC)
+ * r11 points to a tag table providing information about
+ * the system.
+ */
+ .section .init.text,"ax"
+ .global _start
+_start:
+ /* Check if the boot loader actually provided a tag table */
+ lddpc r0, magic_number
+ cp.w r12, r0
+ brne no_tag_table
+
+ /* Initialize .bss */
+ lddpc r2, bss_start_addr
+ lddpc r3, end_addr
+ mov r0, 0
+ mov r1, 0
+1: st.d r2++, r0
+ cp r2, r3
+ brlo 1b
+
+ /*
+ * Save the tag table address for later use. This must be done
+ * _after_ .bss has been initialized...
+ */
+ lddpc r0, tag_table_addr
+ st.w r0[0], r11
+
+ /* Jump to loader-independent setup code */
+ rjmp kernel_entry
+
+ .align 2
+magic_number:
+ .long ATAG_MAGIC
+tag_table_addr:
+ .long bootloader_tags
+bss_start_addr:
+ .long __bss_start
+end_addr:
+ .long _end
+
+no_tag_table:
+ sub r12, pc, (. - 2f)
+ bral panic
+2: .asciz "Boot loader didn't provide correct magic number\n"
diff --git a/arch/avr32/configs/atstk1002_defconfig b/arch/avr32/configs/atstk1002_defconfig
new file mode 100644
index 000000000000..1d22255009fd
--- /dev/null
+++ b/arch/avr32/configs/atstk1002_defconfig
@@ -0,0 +1,754 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.18-rc1
+# Tue Jul 11 12:41:36 2006
+#
+CONFIG_AVR32=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+# CONFIG_SYSVIPC is not set
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+# CONFIG_RELAY is not set
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EMBEDDED=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+CONFIG_SHMEM=y
+# CONFIG_SLAB is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=1
+CONFIG_SLOB=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_KMOD is not set
+
+#
+# Block layer
+#
+# CONFIG_BLK_DEV_IO_TRACE is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_AS is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_NOOP=y
+CONFIG_DEFAULT_IOSCHED="noop"
+
+#
+# System Type and features
+#
+CONFIG_SUBARCH_AVR32B=y
+CONFIG_MMU=y
+CONFIG_PERFORMANCE_COUNTERS=y
+CONFIG_PLATFORM_AT32AP=y
+CONFIG_CPU_AT32AP7000=y
+CONFIG_BOARD_ATSTK1002=y
+CONFIG_BOARD_ATSTK1000=y
+CONFIG_LOADER_U_BOOT=y
+CONFIG_LOAD_ADDRESS=0x10000000
+CONFIG_ENTRY_ADDRESS=0x90000000
+CONFIG_PHYS_OFFSET=0x10000000
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_HAVE_ARCH_BOOTMEM_NODE is not set
+# CONFIG_ARCH_HAVE_MEMORY_PRESENT is not set
+# CONFIG_NEED_NODE_MEMMAP_SIZE is not set
+CONFIG_ARCH_FLATMEM_ENABLE=y
+# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
+# CONFIG_ARCH_SPARSEMEM_ENABLE is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+# CONFIG_OWNERSHIP_TRACE is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_CMDLINE=""
+
+#
+# Bus options
+#
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+# CONFIG_NETDEBUG is not set
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+# CONFIG_IPV6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+
+#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_TCPPROBE is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_SYS_HYPERVISOR is not set
+
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=m
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=m
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+
+#
+# I2O device support
+#
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=m
+
+#
+# PHY device support
+#
+# CONFIG_PHYLIB is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+CONFIG_MACB=y
+
+#
+# Ethernet (1000 Mbit)
+#
+
+#
+# Ethernet (10000 Mbit)
+#
+
+#
+# Token Ring devices
+#
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+CONFIG_PPP=m
+# CONFIG_PPP_MULTILINK is not set
+# CONFIG_PPP_FILTER is not set
+CONFIG_PPP_ASYNC=m
+# CONFIG_PPP_SYNC_TTY is not set
+CONFIG_PPP_DEFLATE=m
+# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPP_MPPE is not set
+# CONFIG_PPPOE is not set
+# CONFIG_SLIP is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_AT91=y
+CONFIG_SERIAL_AT91_CONSOLE=y
+# CONFIG_SERIAL_AT91_TTYAT is not set
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_LEGACY_PTYS is not set
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_RTC is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_RAW_DRIVER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# SPI support
+#
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+CONFIG_SPI_ATMEL=m
+# CONFIG_SPI_BITBANG is not set
+
+#
+# SPI Protocol Masters
+#
+
+#
+# Dallas's 1-wire bus
+#
+
+#
+# Hardware Monitoring support
+#
+# CONFIG_HWMON is not set
+# CONFIG_HWMON_VID is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FIRMWARE_EDID is not set
+CONFIG_FB=m
+CONFIG_FB_CFB_FILLRECT=m
+CONFIG_FB_CFB_COPYAREA=m
+CONFIG_FB_CFB_IMAGEBLIT=m
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+CONFIG_FB_SIDSA=m
+CONFIG_FB_SIDSA_DEFAULT_BPP=24
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
+
+#
+# Logo configuration
+#
+# CONFIG_LOGO is not set
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
+CONFIG_LCD_CLASS_DEVICE=m
+CONFIG_LCD_DEVICE=y
+CONFIG_LCD_LTV350QV=m
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+# CONFIG_USB_ARCH_HAS_HCD is not set
+# CONFIG_USB_ARCH_HAS_OHCI is not set
+# CONFIG_USB_ARCH_HAS_EHCI is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# LED devices
+#
+# CONFIG_NEW_LEDS is not set
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+
+#
+# InfiniBand support
+#
+
+#
+# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
+#
+
+#
+# Real Time Clock
+#
+# CONFIG_RTC_CLASS is not set
+
+#
+# DMA Engine support
+#
+# CONFIG_DMA_ENGINE is not set
+
+#
+# DMA Clients
+#
+
+#
+# DMA Devices
+#
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+CONFIG_CONFIGFS_FS=m
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+CONFIG_CIFS=m
+# CONFIG_CIFS_STATS is not set
+# CONFIG_CIFS_WEAK_PW_HASH is not set
+# CONFIG_CIFS_XATTR is not set
+# CONFIG_CIFS_DEBUG2 is not set
+# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=m
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=m
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+CONFIG_NLS_CODEPAGE_850=m
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=m
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+CONFIG_NLS_UTF8=m
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_RWSEMS is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_DEBUG_VM is not set
+CONFIG_FRAME_POINTER=y
+# CONFIG_UNWIND_INFO is not set
+CONFIG_FORCED_INLINING=y
+# CONFIG_RCU_TORTURE_TEST is not set
+CONFIG_KPROBES=y
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Hardware crypto devices
+#
+
+#
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=m
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=m
+CONFIG_ZLIB_DEFLATE=m
diff --git a/arch/avr32/kernel/Makefile b/arch/avr32/kernel/Makefile
new file mode 100644
index 000000000000..90e5afff54a2
--- /dev/null
+++ b/arch/avr32/kernel/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for the Linux/AVR32 kernel.
+#
+
+extra-y := head.o vmlinux.lds
+
+obj-$(CONFIG_SUBARCH_AVR32B) += entry-avr32b.o
+obj-y += syscall_table.o syscall-stubs.o irq.o
+obj-y += setup.o traps.o semaphore.o ptrace.o
+obj-y += signal.o sys_avr32.o process.o time.o
+obj-y += init_task.o switch_to.o cpu.o
+obj-$(CONFIG_MODULES) += module.o avr32_ksyms.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+
+USE_STANDARD_AS_RULE := true
+
+%.lds: %.lds.c FORCE
+ $(call if_changed_dep,cpp_lds_S)
diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c
new file mode 100644
index 000000000000..97d865865667
--- /dev/null
+++ b/arch/avr32/kernel/asm-offsets.c
@@ -0,0 +1,25 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#include <linux/thread_info.h>
+
+#define DEFINE(sym, val) \
+ asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+#define OFFSET(sym, str, mem) \
+ DEFINE(sym, offsetof(struct str, mem));
+
+void foo(void)
+{
+ OFFSET(TI_task, thread_info, task);
+ OFFSET(TI_exec_domain, thread_info, exec_domain);
+ OFFSET(TI_flags, thread_info, flags);
+ OFFSET(TI_cpu, thread_info, cpu);
+ OFFSET(TI_preempt_count, thread_info, preempt_count);
+ OFFSET(TI_restart_block, thread_info, restart_block);
+}
diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c
new file mode 100644
index 000000000000..04f767a272b7
--- /dev/null
+++ b/arch/avr32/kernel/avr32_ksyms.c
@@ -0,0 +1,55 @@
+/*
+ * Export AVR32-specific functions for loadable modules.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+
+#include <asm/checksum.h>
+#include <asm/uaccess.h>
+#include <asm/delay.h>
+
+/*
+ * GCC functions
+ */
+extern unsigned long long __avr32_lsl64(unsigned long long u, unsigned long b);
+extern unsigned long long __avr32_lsr64(unsigned long long u, unsigned long b);
+extern unsigned long long __avr32_asr64(unsigned long long u, unsigned long b);
+EXPORT_SYMBOL(__avr32_lsl64);
+EXPORT_SYMBOL(__avr32_lsr64);
+EXPORT_SYMBOL(__avr32_asr64);
+
+/*
+ * String functions
+ */
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memcpy);
+
+/*
+ * Userspace access stuff.
+ */
+EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(__copy_user);
+EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_generic);
+
+/* Delay loops (lib/delay.S) */
+EXPORT_SYMBOL(__ndelay);
+EXPORT_SYMBOL(__udelay);
+EXPORT_SYMBOL(__const_udelay);
+
+/* Bit operations (lib/findbit.S) */
+EXPORT_SYMBOL(find_first_zero_bit);
+EXPORT_SYMBOL(find_next_zero_bit);
+EXPORT_SYMBOL(find_first_bit);
+EXPORT_SYMBOL(find_next_bit);
+EXPORT_SYMBOL(generic_find_next_zero_le_bit);
diff --git a/arch/avr32/kernel/cpu.c b/arch/avr32/kernel/cpu.c
new file mode 100644
index 000000000000..342452ba2049
--- /dev/null
+++ b/arch/avr32/kernel/cpu.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/sysdev.h>
+#include <linux/seq_file.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/param.h>
+#include <linux/errno.h>
+
+#include <asm/setup.h>
+#include <asm/sysreg.h>
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+#ifdef CONFIG_PERFORMANCE_COUNTERS
+
+/*
+ * XXX: If/when a SMP-capable implementation of AVR32 will ever be
+ * made, we must make sure that the code executes on the correct CPU.
+ */
+static ssize_t show_pc0event(struct sys_device *dev, char *buf)
+{
+ unsigned long pccr;
+
+ pccr = sysreg_read(PCCR);
+ return sprintf(buf, "0x%lx\n", (pccr >> 12) & 0x3f);
+}
+static ssize_t store_pc0event(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ char *endp;
+
+ val = simple_strtoul(buf, &endp, 0);
+ if (endp == buf || val > 0x3f)
+ return -EINVAL;
+ val = (val << 12) | (sysreg_read(PCCR) & 0xfffc0fff);
+ sysreg_write(PCCR, val);
+ return count;
+}
+static ssize_t show_pc0count(struct sys_device *dev, char *buf)
+{
+ unsigned long pcnt0;
+
+ pcnt0 = sysreg_read(PCNT0);
+ return sprintf(buf, "%lu\n", pcnt0);
+}
+static ssize_t store_pc0count(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ char *endp;
+
+ val = simple_strtoul(buf, &endp, 0);
+ if (endp == buf)
+ return -EINVAL;
+ sysreg_write(PCNT0, val);
+
+ return count;
+}
+
+static ssize_t show_pc1event(struct sys_device *dev, char *buf)
+{
+ unsigned long pccr;
+
+ pccr = sysreg_read(PCCR);
+ return sprintf(buf, "0x%lx\n", (pccr >> 18) & 0x3f);
+}
+static ssize_t store_pc1event(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ char *endp;
+
+ val = simple_strtoul(buf, &endp, 0);
+ if (endp == buf || val > 0x3f)
+ return -EINVAL;
+ val = (val << 18) | (sysreg_read(PCCR) & 0xff03ffff);
+ sysreg_write(PCCR, val);
+ return count;
+}
+static ssize_t show_pc1count(struct sys_device *dev, char *buf)
+{
+ unsigned long pcnt1;
+
+ pcnt1 = sysreg_read(PCNT1);
+ return sprintf(buf, "%lu\n", pcnt1);
+}
+static ssize_t store_pc1count(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ char *endp;
+
+ val = simple_strtoul(buf, &endp, 0);
+ if (endp == buf)
+ return -EINVAL;
+ sysreg_write(PCNT1, val);
+
+ return count;
+}
+
+static ssize_t show_pccycles(struct sys_device *dev, char *buf)
+{
+ unsigned long pccnt;
+
+ pccnt = sysreg_read(PCCNT);
+ return sprintf(buf, "%lu\n", pccnt);
+}
+static ssize_t store_pccycles(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ char *endp;
+
+ val = simple_strtoul(buf, &endp, 0);
+ if (endp == buf)
+ return -EINVAL;
+ sysreg_write(PCCNT, val);
+
+ return count;
+}
+
+static ssize_t show_pcenable(struct sys_device *dev, char *buf)
+{
+ unsigned long pccr;
+
+ pccr = sysreg_read(PCCR);
+ return sprintf(buf, "%c\n", (pccr & 1)?'1':'0');
+}
+static ssize_t store_pcenable(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ unsigned long pccr, val;
+ char *endp;
+
+ val = simple_strtoul(buf, &endp, 0);
+ if (endp == buf)
+ return -EINVAL;
+ if (val)
+ val = 1;
+
+ pccr = sysreg_read(PCCR);
+ pccr = (pccr & ~1UL) | val;
+ sysreg_write(PCCR, pccr);
+
+ return count;
+}
+
+static SYSDEV_ATTR(pc0event, 0600, show_pc0event, store_pc0event);
+static SYSDEV_ATTR(pc0count, 0600, show_pc0count, store_pc0count);
+static SYSDEV_ATTR(pc1event, 0600, show_pc1event, store_pc1event);
+static SYSDEV_ATTR(pc1count, 0600, show_pc1count, store_pc1count);
+static SYSDEV_ATTR(pccycles, 0600, show_pccycles, store_pccycles);
+static SYSDEV_ATTR(pcenable, 0600, show_pcenable, store_pcenable);
+
+#endif /* CONFIG_PERFORMANCE_COUNTERS */
+
+static int __init topology_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+ register_cpu(c, cpu);
+
+#ifdef CONFIG_PERFORMANCE_COUNTERS
+ sysdev_create_file(&c->sysdev, &attr_pc0event);
+ sysdev_create_file(&c->sysdev, &attr_pc0count);
+ sysdev_create_file(&c->sysdev, &attr_pc1event);
+ sysdev_create_file(&c->sysdev, &attr_pc1count);
+ sysdev_create_file(&c->sysdev, &attr_pccycles);
+ sysdev_create_file(&c->sysdev, &attr_pcenable);
+#endif
+ }
+
+ return 0;
+}
+
+subsys_initcall(topology_init);
+
+static const char *cpu_names[] = {
+ "Morgan",
+ "AP7000",
+};
+#define NR_CPU_NAMES ARRAY_SIZE(cpu_names)
+
+static const char *arch_names[] = {
+ "AVR32A",
+ "AVR32B",
+};
+#define NR_ARCH_NAMES ARRAY_SIZE(arch_names)
+
+static const char *mmu_types[] = {
+ "No MMU",
+ "ITLB and DTLB",
+ "Shared TLB",
+ "MPU"
+};
+
+void __init setup_processor(void)
+{
+ unsigned long config0, config1;
+ unsigned cpu_id, cpu_rev, arch_id, arch_rev, mmu_type;
+ unsigned tmp;
+
+ config0 = sysreg_read(CONFIG0); /* 0x0000013e; */
+ config1 = sysreg_read(CONFIG1); /* 0x01f689a2; */
+ cpu_id = config0 >> 24;
+ cpu_rev = (config0 >> 16) & 0xff;
+ arch_id = (config0 >> 13) & 0x07;
+ arch_rev = (config0 >> 10) & 0x07;
+ mmu_type = (config0 >> 7) & 0x03;
+
+ boot_cpu_data.arch_type = arch_id;
+ boot_cpu_data.cpu_type = cpu_id;
+ boot_cpu_data.arch_revision = arch_rev;
+ boot_cpu_data.cpu_revision = cpu_rev;
+ boot_cpu_data.tlb_config = mmu_type;
+
+ tmp = (config1 >> 13) & 0x07;
+ if (tmp) {
+ boot_cpu_data.icache.ways = 1 << ((config1 >> 10) & 0x07);
+ boot_cpu_data.icache.sets = 1 << ((config1 >> 16) & 0x0f);
+ boot_cpu_data.icache.linesz = 1 << (tmp + 1);
+ }
+ tmp = (config1 >> 3) & 0x07;
+ if (tmp) {
+ boot_cpu_data.dcache.ways = 1 << (config1 & 0x07);
+ boot_cpu_data.dcache.sets = 1 << ((config1 >> 6) & 0x0f);
+ boot_cpu_data.dcache.linesz = 1 << (tmp + 1);
+ }
+
+ if ((cpu_id >= NR_CPU_NAMES) || (arch_id >= NR_ARCH_NAMES)) {
+ printk ("Unknown CPU configuration (ID %02x, arch %02x), "
+ "continuing anyway...\n",
+ cpu_id, arch_id);
+ return;
+ }
+
+ printk ("CPU: %s [%02x] revision %d (%s revision %d)\n",
+ cpu_names[cpu_id], cpu_id, cpu_rev,
+ arch_names[arch_id], arch_rev);
+ printk ("CPU: MMU configuration: %s\n", mmu_types[mmu_type]);
+ printk ("CPU: features:");
+ if (config0 & (1 << 6))
+ printk(" fpu");
+ if (config0 & (1 << 5))
+ printk(" java");
+ if (config0 & (1 << 4))
+ printk(" perfctr");
+ if (config0 & (1 << 3))
+ printk(" ocd");
+ printk("\n");
+}
+
+#ifdef CONFIG_PROC_FS
+static int c_show(struct seq_file *m, void *v)
+{
+ unsigned int icache_size, dcache_size;
+ unsigned int cpu = smp_processor_id();
+
+ icache_size = boot_cpu_data.icache.ways *
+ boot_cpu_data.icache.sets *
+ boot_cpu_data.icache.linesz;
+ dcache_size = boot_cpu_data.dcache.ways *
+ boot_cpu_data.dcache.sets *
+ boot_cpu_data.dcache.linesz;
+
+ seq_printf(m, "processor\t: %d\n", cpu);
+
+ if (boot_cpu_data.arch_type < NR_ARCH_NAMES)
+ seq_printf(m, "cpu family\t: %s revision %d\n",
+ arch_names[boot_cpu_data.arch_type],
+ boot_cpu_data.arch_revision);
+ if (boot_cpu_data.cpu_type < NR_CPU_NAMES)
+ seq_printf(m, "cpu type\t: %s revision %d\n",
+ cpu_names[boot_cpu_data.cpu_type],
+ boot_cpu_data.cpu_revision);
+
+ seq_printf(m, "i-cache\t\t: %dK (%u ways x %u sets x %u)\n",
+ icache_size >> 10,
+ boot_cpu_data.icache.ways,
+ boot_cpu_data.icache.sets,
+ boot_cpu_data.icache.linesz);
+ seq_printf(m, "d-cache\t\t: %dK (%u ways x %u sets x %u)\n",
+ dcache_size >> 10,
+ boot_cpu_data.dcache.ways,
+ boot_cpu_data.dcache.sets,
+ boot_cpu_data.dcache.linesz);
+ seq_printf(m, "bogomips\t: %lu.%02lu\n",
+ boot_cpu_data.loops_per_jiffy / (500000/HZ),
+ (boot_cpu_data.loops_per_jiffy / (5000/HZ)) % 100);
+
+ return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ return *pos < 1 ? (void *)1 : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return NULL;
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+
+}
+
+struct seq_operations cpuinfo_op = {
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = c_show
+};
+#endif /* CONFIG_PROC_FS */
diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S
new file mode 100644
index 000000000000..eeb66792bc37
--- /dev/null
+++ b/arch/avr32/kernel/entry-avr32b.S
@@ -0,0 +1,678 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * This file contains the low-level entry-points into the kernel, that is,
+ * exception handlers, debug trap handlers, interrupt handlers and the
+ * system call handler.
+ */
+#include <linux/errno.h>
+
+#include <asm/asm.h>
+#include <asm/hardirq.h>
+#include <asm/irq.h>
+#include <asm/ocd.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+
+#ifdef CONFIG_PREEMPT
+# define preempt_stop mask_interrupts
+#else
+# define preempt_stop
+# define fault_resume_kernel fault_restore_all
+#endif
+
+#define __MASK(x) ((1 << (x)) - 1)
+#define IRQ_MASK ((__MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) | \
+ (__MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT))
+
+ .section .ex.text,"ax",@progbits
+ .align 2
+exception_vectors:
+ bral handle_critical
+ .align 2
+ bral handle_critical
+ .align 2
+ bral do_bus_error_write
+ .align 2
+ bral do_bus_error_read
+ .align 2
+ bral do_nmi_ll
+ .align 2
+ bral handle_address_fault
+ .align 2
+ bral handle_protection_fault
+ .align 2
+ bral handle_debug
+ .align 2
+ bral do_illegal_opcode_ll
+ .align 2
+ bral do_illegal_opcode_ll
+ .align 2
+ bral do_illegal_opcode_ll
+ .align 2
+ bral do_fpe_ll
+ .align 2
+ bral do_illegal_opcode_ll
+ .align 2
+ bral handle_address_fault
+ .align 2
+ bral handle_address_fault
+ .align 2
+ bral handle_protection_fault
+ .align 2
+ bral handle_protection_fault
+ .align 2
+ bral do_dtlb_modified
+
+ /*
+ * r0 : PGD/PT/PTE
+ * r1 : Offending address
+ * r2 : Scratch register
+ * r3 : Cause (5, 12 or 13)
+ */
+#define tlbmiss_save pushm r0-r3
+#define tlbmiss_restore popm r0-r3
+
+ .section .tlbx.ex.text,"ax",@progbits
+ .global itlb_miss
+itlb_miss:
+ tlbmiss_save
+ rjmp tlb_miss_common
+
+ .section .tlbr.ex.text,"ax",@progbits
+dtlb_miss_read:
+ tlbmiss_save
+ rjmp tlb_miss_common
+
+ .section .tlbw.ex.text,"ax",@progbits
+dtlb_miss_write:
+ tlbmiss_save
+
+ .global tlb_miss_common
+tlb_miss_common:
+ mfsr r0, SYSREG_PTBR
+ mfsr r1, SYSREG_TLBEAR
+
+ /* Is it the vmalloc space? */
+ bld r1, 31
+ brcs handle_vmalloc_miss
+
+ /* First level lookup */
+pgtbl_lookup:
+ lsr r2, r1, PGDIR_SHIFT
+ ld.w r0, r0[r2 << 2]
+ bld r0, _PAGE_BIT_PRESENT
+ brcc page_table_not_present
+
+ /* TODO: Check access rights on page table if necessary */
+
+ /* Translate to virtual address in P1. */
+ andl r0, 0xf000
+ sbr r0, 31
+
+ /* Second level lookup */
+ lsl r1, (32 - PGDIR_SHIFT)
+ lsr r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT
+ add r2, r0, r1 << 2
+ ld.w r1, r2[0]
+ bld r1, _PAGE_BIT_PRESENT
+ brcc page_not_present
+
+ /* Mark the page as accessed */
+ sbr r1, _PAGE_BIT_ACCESSED
+ st.w r2[0], r1
+
+ /* Drop software flags */
+ andl r1, _PAGE_FLAGS_HARDWARE_MASK & 0xffff
+ mtsr SYSREG_TLBELO, r1
+
+ /* Figure out which entry we want to replace */
+ mfsr r0, SYSREG_TLBARLO
+ clz r2, r0
+ brcc 1f
+ mov r1, -1 /* All entries have been accessed, */
+ mtsr SYSREG_TLBARLO, r1 /* so reset TLBAR */
+ mov r2, 0 /* and start at 0 */
+1: mfsr r1, SYSREG_MMUCR
+ lsl r2, 14
+ andl r1, 0x3fff, COH
+ or r1, r2
+ mtsr SYSREG_MMUCR, r1
+
+ tlbw
+
+ tlbmiss_restore
+ rete
+
+handle_vmalloc_miss:
+ /* Simply do the lookup in init's page table */
+ mov r0, lo(swapper_pg_dir)
+ orh r0, hi(swapper_pg_dir)
+ rjmp pgtbl_lookup
+
+
+ /* --- System Call --- */
+
+ .section .scall.text,"ax",@progbits
+system_call:
+ pushm r12 /* r12_orig */
+ stmts --sp, r0-lr
+ zero_fp
+ mfsr r0, SYSREG_RAR_SUP
+ mfsr r1, SYSREG_RSR_SUP
+ stm --sp, r0-r1
+
+ /* check for syscall tracing */
+ get_thread_info r0
+ ld.w r1, r0[TI_flags]
+ bld r1, TIF_SYSCALL_TRACE
+ brcs syscall_trace_enter
+
+syscall_trace_cont:
+ cp.w r8, NR_syscalls
+ brhs syscall_badsys
+
+ lddpc lr, syscall_table_addr
+ ld.w lr, lr[r8 << 2]
+ mov r8, r5 /* 5th argument (6th is pushed by stub) */
+ icall lr
+
+ .global syscall_return
+syscall_return:
+ get_thread_info r0
+ mask_interrupts /* make sure we don't miss an interrupt
+ setting need_resched or sigpending
+ between sampling and the rets */
+
+ /* Store the return value so that the correct value is loaded below */
+ stdsp sp[REG_R12], r12
+
+ ld.w r1, r0[TI_flags]
+ andl r1, _TIF_ALLWORK_MASK, COH
+ brne syscall_exit_work
+
+syscall_exit_cont:
+ popm r8-r9
+ mtsr SYSREG_RAR_SUP, r8
+ mtsr SYSREG_RSR_SUP, r9
+ ldmts sp++, r0-lr
+ sub sp, -4 /* r12_orig */
+ rets
+
+ .align 2
+syscall_table_addr:
+ .long sys_call_table
+
+syscall_badsys:
+ mov r12, -ENOSYS
+ rjmp syscall_return
+
+ .global ret_from_fork
+ret_from_fork:
+ rcall schedule_tail
+
+ /* check for syscall tracing */
+ get_thread_info r0
+ ld.w r1, r0[TI_flags]
+ andl r1, _TIF_ALLWORK_MASK, COH
+ brne syscall_exit_work
+ rjmp syscall_exit_cont
+
+syscall_trace_enter:
+ pushm r8-r12
+ rcall syscall_trace
+ popm r8-r12
+ rjmp syscall_trace_cont
+
+syscall_exit_work:
+ bld r1, TIF_SYSCALL_TRACE
+ brcc 1f
+ unmask_interrupts
+ rcall syscall_trace
+ mask_interrupts
+ ld.w r1, r0[TI_flags]
+
+1: bld r1, TIF_NEED_RESCHED
+ brcc 2f
+ unmask_interrupts
+ rcall schedule
+ mask_interrupts
+ ld.w r1, r0[TI_flags]
+ rjmp 1b
+
+2: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+ tst r1, r2
+ breq 3f
+ unmask_interrupts
+ mov r12, sp
+ mov r11, r0
+ rcall do_notify_resume
+ mask_interrupts
+ ld.w r1, r0[TI_flags]
+ rjmp 1b
+
+3: bld r1, TIF_BREAKPOINT
+ brcc syscall_exit_cont
+ mfsr r3, SYSREG_TLBEHI
+ lddsp r2, sp[REG_PC]
+ andl r3, 0xff, COH
+ lsl r3, 1
+ sbr r3, 30
+ sbr r3, 0
+ mtdr DBGREG_BWA2A, r2
+ mtdr DBGREG_BWC2A, r3
+ rjmp syscall_exit_cont
+
+
+ /* The slow path of the TLB miss handler */
+page_table_not_present:
+page_not_present:
+ tlbmiss_restore
+ sub sp, 4
+ stmts --sp, r0-lr
+ rcall save_full_context_ex
+ mfsr r12, SYSREG_ECR
+ mov r11, sp
+ rcall do_page_fault
+ rjmp ret_from_exception
+
+ /* This function expects to find offending PC in SYSREG_RAR_EX */
+save_full_context_ex:
+ mfsr r8, SYSREG_RSR_EX
+ mov r12, r8
+ andh r8, (MODE_MASK >> 16), COH
+ mfsr r11, SYSREG_RAR_EX
+ brne 2f
+
+1: pushm r11, r12 /* PC and SR */
+ unmask_exceptions
+ ret r12
+
+2: sub r10, sp, -(FRAME_SIZE_FULL - REG_LR)
+ stdsp sp[4], r10 /* replace saved SP */
+ rjmp 1b
+
+ /* Low-level exception handlers */
+handle_critical:
+ pushm r12
+ pushm r0-r12
+ rcall save_full_context_ex
+ mfsr r12, SYSREG_ECR
+ mov r11, sp
+ rcall do_critical_exception
+
+ /* We should never get here... */
+bad_return:
+ sub r12, pc, (. - 1f)
+ bral panic
+ .align 2
+1: .asciz "Return from critical exception!"
+
+ .align 1
+do_bus_error_write:
+ sub sp, 4
+ stmts --sp, r0-lr
+ rcall save_full_context_ex
+ mov r11, 1
+ rjmp 1f
+
+do_bus_error_read:
+ sub sp, 4
+ stmts --sp, r0-lr
+ rcall save_full_context_ex
+ mov r11, 0
+1: mfsr r12, SYSREG_BEAR
+ mov r10, sp
+ rcall do_bus_error
+ rjmp ret_from_exception
+
+ .align 1
+do_nmi_ll:
+ sub sp, 4
+ stmts --sp, r0-lr
+ /* FIXME: Make sure RAR_NMI and RSR_NMI are pushed instead of *_EX */
+ rcall save_full_context_ex
+ mfsr r12, SYSREG_ECR
+ mov r11, sp
+ rcall do_nmi
+ rjmp bad_return
+
+handle_address_fault:
+ sub sp, 4
+ stmts --sp, r0-lr
+ rcall save_full_context_ex
+ mfsr r12, SYSREG_ECR
+ mov r11, sp
+ rcall do_address_exception
+ rjmp ret_from_exception
+
+handle_protection_fault:
+ sub sp, 4
+ stmts --sp, r0-lr
+ rcall save_full_context_ex
+ mfsr r12, SYSREG_ECR
+ mov r11, sp
+ rcall do_page_fault
+ rjmp ret_from_exception
+
+ .align 1
+do_illegal_opcode_ll:
+ sub sp, 4
+ stmts --sp, r0-lr
+ rcall save_full_context_ex
+ mfsr r12, SYSREG_ECR
+ mov r11, sp
+ rcall do_illegal_opcode
+ rjmp ret_from_exception
+
+do_dtlb_modified:
+ pushm r0-r3
+ mfsr r1, SYSREG_TLBEAR
+ mfsr r0, SYSREG_PTBR
+ lsr r2, r1, PGDIR_SHIFT
+ ld.w r0, r0[r2 << 2]
+ lsl r1, (32 - PGDIR_SHIFT)
+ lsr r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT
+
+ /* Translate to virtual address in P1 */
+ andl r0, 0xf000
+ sbr r0, 31
+ add r2, r0, r1 << 2
+ ld.w r3, r2[0]
+ sbr r3, _PAGE_BIT_DIRTY
+ mov r0, r3
+ st.w r2[0], r3
+
+ /* The page table is up-to-date. Update the TLB entry as well */
+ andl r0, lo(_PAGE_FLAGS_HARDWARE_MASK)
+ mtsr SYSREG_TLBELO, r0
+
+ /* MMUCR[DRP] is updated automatically, so let's go... */
+ tlbw
+
+ popm r0-r3
+ rete
+
+do_fpe_ll:
+ sub sp, 4
+ stmts --sp, r0-lr
+ rcall save_full_context_ex
+ unmask_interrupts
+ mov r12, 26
+ mov r11, sp
+ rcall do_fpe
+ rjmp ret_from_exception
+
+ret_from_exception:
+ mask_interrupts
+ lddsp r4, sp[REG_SR]
+ andh r4, (MODE_MASK >> 16), COH
+ brne fault_resume_kernel
+
+ get_thread_info r0
+ ld.w r1, r0[TI_flags]
+ andl r1, _TIF_WORK_MASK, COH
+ brne fault_exit_work
+
+fault_resume_user:
+ popm r8-r9
+ mask_exceptions
+ mtsr SYSREG_RAR_EX, r8
+ mtsr SYSREG_RSR_EX, r9
+ ldmts sp++, r0-lr
+ sub sp, -4
+ rete
+
+fault_resume_kernel:
+#ifdef CONFIG_PREEMPT
+ get_thread_info r0
+ ld.w r2, r0[TI_preempt_count]
+ cp.w r2, 0
+ brne 1f
+ ld.w r1, r0[TI_flags]
+ bld r1, TIF_NEED_RESCHED
+ brcc 1f
+ lddsp r4, sp[REG_SR]
+ bld r4, SYSREG_GM_OFFSET
+ brcs 1f
+ rcall preempt_schedule_irq
+1:
+#endif
+
+ popm r8-r9
+ mask_exceptions
+ mfsr r1, SYSREG_SR
+ mtsr SYSREG_RAR_EX, r8
+ mtsr SYSREG_RSR_EX, r9
+ popm lr
+ sub sp, -4 /* ignore SP */
+ popm r0-r12
+ sub sp, -4 /* ignore r12_orig */
+ rete
+
+irq_exit_work:
+ /* Switch to exception mode so that we can share the same code. */
+ mfsr r8, SYSREG_SR
+ cbr r8, SYSREG_M0_OFFSET
+ orh r8, hi(SYSREG_BIT(M1) | SYSREG_BIT(M2))
+ mtsr SYSREG_SR, r8
+ sub pc, -2
+ get_thread_info r0
+ ld.w r1, r0[TI_flags]
+
+fault_exit_work:
+ bld r1, TIF_NEED_RESCHED
+ brcc 1f
+ unmask_interrupts
+ rcall schedule
+ mask_interrupts
+ ld.w r1, r0[TI_flags]
+ rjmp fault_exit_work
+
+1: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+ tst r1, r2
+ breq 2f
+ unmask_interrupts
+ mov r12, sp
+ mov r11, r0
+ rcall do_notify_resume
+ mask_interrupts
+ ld.w r1, r0[TI_flags]
+ rjmp fault_exit_work
+
+2: bld r1, TIF_BREAKPOINT
+ brcc fault_resume_user
+ mfsr r3, SYSREG_TLBEHI
+ lddsp r2, sp[REG_PC]
+ andl r3, 0xff, COH
+ lsl r3, 1
+ sbr r3, 30
+ sbr r3, 0
+ mtdr DBGREG_BWA2A, r2
+ mtdr DBGREG_BWC2A, r3
+ rjmp fault_resume_user
+
+ /* If we get a debug trap from privileged context we end up here */
+handle_debug_priv:
+ /* Fix up LR and SP in regs. r11 contains the mode we came from */
+ mfsr r8, SYSREG_SR
+ mov r9, r8
+ andh r8, hi(~MODE_MASK)
+ or r8, r11
+ mtsr SYSREG_SR, r8
+ sub pc, -2
+ stdsp sp[REG_LR], lr
+ mtsr SYSREG_SR, r9
+ sub pc, -2
+ sub r10, sp, -FRAME_SIZE_FULL
+ stdsp sp[REG_SP], r10
+ mov r12, sp
+ rcall do_debug_priv
+
+ /* Now, put everything back */
+ ssrf SR_EM_BIT
+ popm r10, r11
+ mtsr SYSREG_RAR_DBG, r10
+ mtsr SYSREG_RSR_DBG, r11
+ mfsr r8, SYSREG_SR
+ mov r9, r8
+ andh r8, hi(~MODE_MASK)
+ andh r11, hi(MODE_MASK)
+ or r8, r11
+ mtsr SYSREG_SR, r8
+ sub pc, -2
+ popm lr
+ mtsr SYSREG_SR, r9
+ sub pc, -2
+ sub sp, -4 /* skip SP */
+ popm r0-r12
+ sub sp, -4
+ retd
+
+ /*
+ * At this point, everything is masked, that is, interrupts,
+ * exceptions and debugging traps. We might get called from
+ * interrupt or exception context in some rare cases, but this
+ * will be taken care of by do_debug(), so we're not going to
+ * do a 100% correct context save here.
+ */
+handle_debug:
+ sub sp, 4 /* r12_orig */
+ stmts --sp, r0-lr
+ mfsr r10, SYSREG_RAR_DBG
+ mfsr r11, SYSREG_RSR_DBG
+ unmask_exceptions
+ pushm r10,r11
+ andh r11, (MODE_MASK >> 16), COH
+ brne handle_debug_priv
+
+ mov r12, sp
+ rcall do_debug
+
+ lddsp r10, sp[REG_SR]
+ andh r10, (MODE_MASK >> 16), COH
+ breq debug_resume_user
+
+debug_restore_all:
+ popm r10,r11
+ mask_exceptions
+ mtsr SYSREG_RSR_DBG, r11
+ mtsr SYSREG_RAR_DBG, r10
+ ldmts sp++, r0-lr
+ sub sp, -4
+ retd
+
+debug_resume_user:
+ get_thread_info r0
+ mask_interrupts
+
+ ld.w r1, r0[TI_flags]
+ andl r1, _TIF_DBGWORK_MASK, COH
+ breq debug_restore_all
+
+1: bld r1, TIF_NEED_RESCHED
+ brcc 2f
+ unmask_interrupts
+ rcall schedule
+ mask_interrupts
+ ld.w r1, r0[TI_flags]
+ rjmp 1b
+
+2: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+ tst r1, r2
+ breq 3f
+ unmask_interrupts
+ mov r12, sp
+ mov r11, r0
+ rcall do_notify_resume
+ mask_interrupts
+ ld.w r1, r0[TI_flags]
+ rjmp 1b
+
+3: bld r1, TIF_SINGLE_STEP
+ brcc debug_restore_all
+ mfdr r2, DBGREG_DC
+ sbr r2, DC_SS_BIT
+ mtdr DBGREG_DC, r2
+ rjmp debug_restore_all
+
+ .set rsr_int0, SYSREG_RSR_INT0
+ .set rsr_int1, SYSREG_RSR_INT1
+ .set rsr_int2, SYSREG_RSR_INT2
+ .set rsr_int3, SYSREG_RSR_INT3
+ .set rar_int0, SYSREG_RAR_INT0
+ .set rar_int1, SYSREG_RAR_INT1
+ .set rar_int2, SYSREG_RAR_INT2
+ .set rar_int3, SYSREG_RAR_INT3
+
+ .macro IRQ_LEVEL level
+ .type irq_level\level, @function
+irq_level\level:
+ sub sp, 4 /* r12_orig */
+ stmts --sp,r0-lr
+ mfsr r8, rar_int\level
+ mfsr r9, rsr_int\level
+ pushm r8-r9
+
+ mov r11, sp
+ mov r12, \level
+
+ rcall do_IRQ
+
+ lddsp r4, sp[REG_SR]
+ andh r4, (MODE_MASK >> 16), COH
+#ifdef CONFIG_PREEMPT
+ brne 2f
+#else
+ brne 1f
+#endif
+
+ get_thread_info r0
+ ld.w r1, r0[TI_flags]
+ andl r1, _TIF_WORK_MASK, COH
+ brne irq_exit_work
+
+1: popm r8-r9
+ mtsr rar_int\level, r8
+ mtsr rsr_int\level, r9
+ ldmts sp++,r0-lr
+ sub sp, -4 /* ignore r12_orig */
+ rete
+
+#ifdef CONFIG_PREEMPT
+2:
+ get_thread_info r0
+ ld.w r2, r0[TI_preempt_count]
+ cp.w r2, 0
+ brne 1b
+ ld.w r1, r0[TI_flags]
+ bld r1, TIF_NEED_RESCHED
+ brcc 1b
+ lddsp r4, sp[REG_SR]
+ bld r4, SYSREG_GM_OFFSET
+ brcs 1b
+ rcall preempt_schedule_irq
+ rjmp 1b
+#endif
+ .endm
+
+ .section .irq.text,"ax",@progbits
+
+ .global irq_level0
+ .global irq_level1
+ .global irq_level2
+ .global irq_level3
+ IRQ_LEVEL 0
+ IRQ_LEVEL 1
+ IRQ_LEVEL 2
+ IRQ_LEVEL 3
diff --git a/arch/avr32/kernel/head.S b/arch/avr32/kernel/head.S
new file mode 100644
index 000000000000..773b7ad87be9
--- /dev/null
+++ b/arch/avr32/kernel/head.S
@@ -0,0 +1,45 @@
+/*
+ * Non-board-specific low-level startup code
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/sysreg.h>
+
+ .section .init.text,"ax"
+ .global kernel_entry
+kernel_entry:
+ /* Initialize status register */
+ lddpc r0, init_sr
+ mtsr SYSREG_SR, r0
+
+ /* Set initial stack pointer */
+ lddpc sp, stack_addr
+ sub sp, -THREAD_SIZE
+
+#ifdef CONFIG_FRAME_POINTER
+ /* Mark last stack frame */
+ mov lr, 0
+ mov r7, 0
+#endif
+
+ /* Set up the PIO, SDRAM controller, early printk, etc. */
+ rcall board_early_init
+
+ /* Start the show */
+ lddpc pc, kernel_start_addr
+
+ .align 2
+init_sr:
+ .long 0x007f0000 /* Supervisor mode, everything masked */
+stack_addr:
+ .long init_thread_union
+kernel_start_addr:
+ .long start_kernel
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c
new file mode 100644
index 000000000000..effcacf9d1a2
--- /dev/null
+++ b/arch/avr32/kernel/init_task.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+
+#include <asm/pgtable.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial thread structure. Must be aligned on an 8192-byte boundary.
+ */
+union thread_union init_thread_union
+ __attribute__((__section__(".data.init_task"))) =
+ { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c
new file mode 100644
index 000000000000..856f3548e664
--- /dev/null
+++ b/arch/avr32/kernel/irq.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on arch/i386/kernel/irq.c
+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * IRQ's are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/sysdev.h>
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+ printk("unexpected IRQ %u\n", irq);
+}
+
+#ifdef CONFIG_PROC_FS
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int i = *(loff_t *)v, cpu;
+ struct irqaction *action;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_puts(p, " ");
+ for_each_online_cpu(cpu)
+ seq_printf(p, "CPU%d ", cpu);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
+ action = irq_desc[i].action;
+ if (!action)
+ goto unlock;
+
+ seq_printf(p, "%3d: ", i);
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
+ seq_printf(p, " %s", action->name);
+ for (action = action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+
+ seq_putc(p, '\n');
+ unlock:
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ }
+
+ return 0;
+}
+#endif
diff --git a/arch/avr32/kernel/kprobes.c b/arch/avr32/kernel/kprobes.c
new file mode 100644
index 000000000000..6caf9e8d8080
--- /dev/null
+++ b/arch/avr32/kernel/kprobes.c
@@ -0,0 +1,270 @@
+/*
+ * Kernel Probes (KProbes)
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * Based on arch/ppc64/kernel/kprobes.c
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+#include <asm/ocd.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
+static unsigned long kprobe_status;
+static struct pt_regs jprobe_saved_regs;
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ int ret = 0;
+
+ if ((unsigned long)p->addr & 0x01) {
+ printk("Attempt to register kprobe at an unaligned address\n");
+ ret = -EINVAL;
+ }
+
+ /* XXX: Might be a good idea to check if p->addr is a valid
+ * kernel address as well... */
+
+ if (!ret) {
+ pr_debug("copy kprobe at %p\n", p->addr);
+ memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ p->opcode = *p->addr;
+ }
+
+ return ret;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ pr_debug("arming kprobe at %p\n", p->addr);
+ *p->addr = BREAKPOINT_INSTRUCTION;
+ flush_icache_range((unsigned long)p->addr,
+ (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ pr_debug("disarming kprobe at %p\n", p->addr);
+ *p->addr = p->opcode;
+ flush_icache_range((unsigned long)p->addr,
+ (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+ unsigned long dc;
+
+ pr_debug("preparing to singlestep over %p (PC=%08lx)\n",
+ p->addr, regs->pc);
+
+ BUG_ON(!(sysreg_read(SR) & SYSREG_BIT(SR_D)));
+
+ dc = __mfdr(DBGREG_DC);
+ dc |= DC_SS;
+ __mtdr(DBGREG_DC, dc);
+
+ /*
+ * We must run the instruction from its original location
+ * since it may actually reference PC.
+ *
+ * TODO: Do the instruction replacement directly in icache.
+ */
+ *p->addr = p->opcode;
+ flush_icache_range((unsigned long)p->addr,
+ (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+ unsigned long dc;
+
+ pr_debug("resuming execution at PC=%08lx\n", regs->pc);
+
+ dc = __mfdr(DBGREG_DC);
+ dc &= ~DC_SS;
+ __mtdr(DBGREG_DC, dc);
+
+ *p->addr = BREAKPOINT_INSTRUCTION;
+ flush_icache_range((unsigned long)p->addr,
+ (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p)
+{
+ __get_cpu_var(current_kprobe) = p;
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *p;
+ void *addr = (void *)regs->pc;
+ int ret = 0;
+
+ pr_debug("kprobe_handler: kprobe_running=%d\n",
+ kprobe_running());
+
+ /*
+ * We don't want to be preempted for the entire
+ * duration of kprobe processing
+ */
+ preempt_disable();
+
+ /* Check that we're not recursing */
+ if (kprobe_running()) {
+ p = get_kprobe(addr);
+ if (p) {
+ if (kprobe_status == KPROBE_HIT_SS) {
+ printk("FIXME: kprobe hit while single-stepping!\n");
+ goto no_kprobe;
+ }
+
+ printk("FIXME: kprobe hit while handling another kprobe\n");
+ goto no_kprobe;
+ } else {
+ p = kprobe_running();
+ if (p->break_handler && p->break_handler(p, regs))
+ goto ss_probe;
+ }
+ /* If it's not ours, can't be delete race, (we hold lock). */
+ goto no_kprobe;
+ }
+
+ p = get_kprobe(addr);
+ if (!p)
+ goto no_kprobe;
+
+ kprobe_status = KPROBE_HIT_ACTIVE;
+ set_current_kprobe(p);
+ if (p->pre_handler && p->pre_handler(p, regs))
+ /* handler has already set things up, so skip ss setup */
+ return 1;
+
+ss_probe:
+ prepare_singlestep(p, regs);
+ kprobe_status = KPROBE_HIT_SS;
+ return 1;
+
+no_kprobe:
+ return ret;
+}
+
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+
+ pr_debug("post_kprobe_handler, cur=%p\n", cur);
+
+ if (!cur)
+ return 0;
+
+ if (cur->post_handler) {
+ kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
+ }
+
+ resume_execution(cur, regs);
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+
+ return 1;
+}
+
+static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ struct kprobe *cur = kprobe_running();
+
+ pr_debug("kprobe_fault_handler: trapnr=%d\n", trapnr);
+
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ if (kprobe_status & KPROBE_HIT_SS) {
+ resume_execution(cur, regs);
+ preempt_enable_no_resched();
+ }
+ return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
+ pr_debug("kprobe_exceptions_notify: val=%lu, data=%p\n",
+ val, data);
+
+ switch (val) {
+ case DIE_BREAKPOINT:
+ if (kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_SSTEP:
+ if (post_kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_FAULT:
+ if (kprobe_running()
+ && kprobe_fault_handler(args->regs, args->trapnr))
+ ret = NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+
+ memcpy(&jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+ /*
+ * TODO: We should probably save some of the stack here as
+ * well, since gcc may pass arguments on the stack for certain
+ * functions (lots of arguments, large aggregates, varargs)
+ */
+
+ /* setup return addr to the jprobe handler routine */
+ regs->pc = (unsigned long)jp->entry;
+ return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+ asm volatile("breakpoint" ::: "memory");
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ /*
+ * FIXME - we should ideally be validating that we got here 'cos
+ * of the "trap" in jprobe_return() above, before restoring the
+ * saved regs...
+ */
+ memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs));
+ return 1;
+}
+
+int __init arch_init_kprobes(void)
+{
+ printk("KPROBES: Enabling monitor mode (MM|DBE)...\n");
+ __mtdr(DBGREG_DC, DC_MM | DC_DBE);
+
+ /* TODO: Register kretprobe trampoline */
+ return 0;
+}
diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c
new file mode 100644
index 000000000000..dfc32f2817b6
--- /dev/null
+++ b/arch/avr32/kernel/module.c
@@ -0,0 +1,324 @@
+/*
+ * AVR32-specific kernel module loader
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * GOT initialization parts are based on the s390 version
+ * Copyright (C) 2002, 2003 IBM Deutschland Entwicklung GmbH,
+ * IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+
+void *module_alloc(unsigned long size)
+{
+ if (size == 0)
+ return NULL;
+ return vmalloc(size);
+}
+
+void module_free(struct module *mod, void *module_region)
+{
+ vfree(mod->arch.syminfo);
+ mod->arch.syminfo = NULL;
+
+ vfree(module_region);
+ /* FIXME: if module_region == mod->init_region, trim exception
+ * table entries. */
+}
+
+static inline int check_rela(Elf32_Rela *rela, struct module *module,
+ char *strings, Elf32_Sym *symbols)
+{
+ struct mod_arch_syminfo *info;
+
+ info = module->arch.syminfo + ELF32_R_SYM(rela->r_info);
+ switch (ELF32_R_TYPE(rela->r_info)) {
+ case R_AVR32_GOT32:
+ case R_AVR32_GOT16:
+ case R_AVR32_GOT8:
+ case R_AVR32_GOT21S:
+ case R_AVR32_GOT18SW: /* mcall */
+ case R_AVR32_GOT16S: /* ld.w */
+ if (rela->r_addend != 0) {
+ printk(KERN_ERR
+ "GOT relocation against %s at offset %u with addend\n",
+ strings + symbols[ELF32_R_SYM(rela->r_info)].st_name,
+ rela->r_offset);
+ return -ENOEXEC;
+ }
+ if (info->got_offset == -1UL) {
+ info->got_offset = module->arch.got_size;
+ module->arch.got_size += sizeof(void *);
+ }
+ pr_debug("GOT[%3lu] %s\n", info->got_offset,
+ strings + symbols[ELF32_R_SYM(rela->r_info)].st_name);
+ break;
+ }
+
+ return 0;
+}
+
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *module)
+{
+ Elf32_Shdr *symtab;
+ Elf32_Sym *symbols;
+ Elf32_Rela *rela;
+ char *strings;
+ int nrela, i, j;
+ int ret;
+
+ /* Find the symbol table */
+ symtab = NULL;
+ for (i = 0; i < hdr->e_shnum; i++)
+ switch (sechdrs[i].sh_type) {
+ case SHT_SYMTAB:
+ symtab = &sechdrs[i];
+ break;
+ }
+ if (!symtab) {
+ printk(KERN_ERR "module %s: no symbol table\n", module->name);
+ return -ENOEXEC;
+ }
+
+ /* Allocate room for one syminfo structure per symbol. */
+ module->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym);
+ module->arch.syminfo = vmalloc(module->arch.nsyms
+ * sizeof(struct mod_arch_syminfo));
+ if (!module->arch.syminfo)
+ return -ENOMEM;
+
+ symbols = (void *)hdr + symtab->sh_offset;
+ strings = (void *)hdr + sechdrs[symtab->sh_link].sh_offset;
+ for (i = 0; i < module->arch.nsyms; i++) {
+ if (symbols[i].st_shndx == SHN_UNDEF &&
+ strcmp(strings + symbols[i].st_name,
+ "_GLOBAL_OFFSET_TABLE_") == 0)
+ /* "Define" it as absolute. */
+ symbols[i].st_shndx = SHN_ABS;
+ module->arch.syminfo[i].got_offset = -1UL;
+ module->arch.syminfo[i].got_initialized = 0;
+ }
+
+ /* Allocate GOT entries for symbols that need it. */
+ module->arch.got_size = 0;
+ for (i = 0; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type != SHT_RELA)
+ continue;
+ nrela = sechdrs[i].sh_size / sizeof(Elf32_Rela);
+ rela = (void *)hdr + sechdrs[i].sh_offset;
+ for (j = 0; j < nrela; j++) {
+ ret = check_rela(rela + j, module,
+ strings, symbols);
+ if (ret)
+ goto out_free_syminfo;
+ }
+ }
+
+ /*
+ * Increase core size to make room for GOT and set start
+ * offset for GOT.
+ */
+ module->core_size = ALIGN(module->core_size, 4);
+ module->arch.got_offset = module->core_size;
+ module->core_size += module->arch.got_size;
+
+ return 0;
+
+out_free_syminfo:
+ vfree(module->arch.syminfo);
+ module->arch.syminfo = NULL;
+
+ return ret;
+}
+
+static inline int reloc_overflow(struct module *module, const char *reloc_name,
+ Elf32_Addr relocation)
+{
+ printk(KERN_ERR "module %s: Value %lx does not fit relocation %s\n",
+ module->name, (unsigned long)relocation, reloc_name);
+ return -ENOEXEC;
+}
+
+#define get_u16(loc) (*((uint16_t *)loc))
+#define put_u16(loc, val) (*((uint16_t *)loc) = (val))
+
+int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
+ unsigned int symindex, unsigned int relindex,
+ struct module *module)
+{
+ Elf32_Shdr *symsec = sechdrs + symindex;
+ Elf32_Shdr *relsec = sechdrs + relindex;
+ Elf32_Shdr *dstsec = sechdrs + relsec->sh_info;
+ Elf32_Rela *rel = (void *)relsec->sh_addr;
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rela); i++, rel++) {
+ struct mod_arch_syminfo *info;
+ Elf32_Sym *sym;
+ Elf32_Addr relocation;
+ uint32_t *location;
+ uint32_t value;
+
+ location = (void *)dstsec->sh_addr + rel->r_offset;
+ sym = (Elf32_Sym *)symsec->sh_addr + ELF32_R_SYM(rel->r_info);
+ relocation = sym->st_value + rel->r_addend;
+
+ info = module->arch.syminfo + ELF32_R_SYM(rel->r_info);
+
+ /* Initialize GOT entry if necessary */
+ switch (ELF32_R_TYPE(rel->r_info)) {
+ case R_AVR32_GOT32:
+ case R_AVR32_GOT16:
+ case R_AVR32_GOT8:
+ case R_AVR32_GOT21S:
+ case R_AVR32_GOT18SW:
+ case R_AVR32_GOT16S:
+ if (!info->got_initialized) {
+ Elf32_Addr *gotent;
+
+ gotent = (module->module_core
+ + module->arch.got_offset
+ + info->got_offset);
+ *gotent = relocation;
+ info->got_initialized = 1;
+ }
+
+ relocation = info->got_offset;
+ break;
+ }
+
+ switch (ELF32_R_TYPE(rel->r_info)) {
+ case R_AVR32_32:
+ case R_AVR32_32_CPENT:
+ *location = relocation;
+ break;
+ case R_AVR32_22H_PCREL:
+ relocation -= (Elf32_Addr)location;
+ if ((relocation & 0xffe00001) != 0
+ && (relocation & 0xffc00001) != 0xffc00000)
+ return reloc_overflow(module,
+ "R_AVR32_22H_PCREL",
+ relocation);
+ relocation >>= 1;
+
+ value = *location;
+ value = ((value & 0xe1ef0000)
+ | (relocation & 0xffff)
+ | ((relocation & 0x10000) << 4)
+ | ((relocation & 0x1e0000) << 8));
+ *location = value;
+ break;
+ case R_AVR32_11H_PCREL:
+ relocation -= (Elf32_Addr)location;
+ if ((relocation & 0xfffffc01) != 0
+ && (relocation & 0xfffff801) != 0xfffff800)
+ return reloc_overflow(module,
+ "R_AVR32_11H_PCREL",
+ relocation);
+ value = get_u16(location);
+ value = ((value & 0xf00c)
+ | ((relocation & 0x1fe) << 3)
+ | ((relocation & 0x600) >> 9));
+ put_u16(location, value);
+ break;
+ case R_AVR32_9H_PCREL:
+ relocation -= (Elf32_Addr)location;
+ if ((relocation & 0xffffff01) != 0
+ && (relocation & 0xfffffe01) != 0xfffffe00)
+ return reloc_overflow(module,
+ "R_AVR32_9H_PCREL",
+ relocation);
+ value = get_u16(location);
+ value = ((value & 0xf00f)
+ | ((relocation & 0x1fe) << 3));
+ put_u16(location, value);
+ break;
+ case R_AVR32_9UW_PCREL:
+ relocation -= ((Elf32_Addr)location) & 0xfffffffc;
+ if ((relocation & 0xfffffc03) != 0)
+ return reloc_overflow(module,
+ "R_AVR32_9UW_PCREL",
+ relocation);
+ value = get_u16(location);
+ value = ((value & 0xf80f)
+ | ((relocation & 0x1fc) << 2));
+ put_u16(location, value);
+ break;
+ case R_AVR32_GOTPC:
+ /*
+ * R6 = PC - (PC - GOT)
+ *
+ * At this point, relocation contains the
+ * value of PC. Just subtract the value of
+ * GOT, and we're done.
+ */
+ pr_debug("GOTPC: PC=0x%lx, got_offset=0x%lx, core=0x%p\n",
+ relocation, module->arch.got_offset,
+ module->module_core);
+ relocation -= ((unsigned long)module->module_core
+ + module->arch.got_offset);
+ *location = relocation;
+ break;
+ case R_AVR32_GOT18SW:
+ if ((relocation & 0xfffe0003) != 0
+ && (relocation & 0xfffc0003) != 0xffff0000)
+ return reloc_overflow(module, "R_AVR32_GOT18SW",
+ relocation);
+ relocation >>= 2;
+ /* fall through */
+ case R_AVR32_GOT16S:
+ if ((relocation & 0xffff8000) != 0
+ && (relocation & 0xffff0000) != 0xffff0000)
+ return reloc_overflow(module, "R_AVR32_GOT16S",
+ relocation);
+ pr_debug("GOT reloc @ 0x%lx -> %lu\n",
+ rel->r_offset, relocation);
+ value = *location;
+ value = ((value & 0xffff0000)
+ | (relocation & 0xffff));
+ *location = value;
+ break;
+
+ default:
+ printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+ module->name, ELF32_R_TYPE(rel->r_info));
+ return -ENOEXEC;
+ }
+ }
+
+ return ret;
+}
+
+int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab,
+ unsigned int symindex, unsigned int relindex,
+ struct module *module)
+{
+ printk(KERN_ERR "module %s: REL relocations are not supported\n",
+ module->name);
+ return -ENOEXEC;
+}
+
+int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
+ struct module *module)
+{
+ vfree(module->arch.syminfo);
+ module->arch.syminfo = NULL;
+
+ return 0;
+}
+
+void module_arch_cleanup(struct module *module)
+{
+
+}
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
new file mode 100644
index 000000000000..317dc50945f2
--- /dev/null
+++ b/arch/avr32/kernel/process.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/fs.h>
+#include <linux/ptrace.h>
+#include <linux/reboot.h>
+#include <linux/unistd.h>
+
+#include <asm/sysreg.h>
+#include <asm/ocd.h>
+
+void (*pm_power_off)(void) = NULL;
+EXPORT_SYMBOL(pm_power_off);
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+void cpu_idle(void)
+{
+ /* endless idle loop with no priority at all */
+ while (1) {
+ /* TODO: Enter sleep mode */
+ while (!need_resched())
+ cpu_relax();
+ preempt_enable_no_resched();
+ schedule();
+ preempt_disable();
+ }
+}
+
+void machine_halt(void)
+{
+}
+
+void machine_power_off(void)
+{
+}
+
+void machine_restart(char *cmd)
+{
+ __mtdr(DBGREG_DC, DC_DBE);
+ __mtdr(DBGREG_DC, DC_RES);
+ while (1) ;
+}
+
+/*
+ * PC is actually discarded when returning from a system call -- the
+ * return address must be stored in LR. This function will make sure
+ * LR points to do_exit before starting the thread.
+ *
+ * Also, when returning from fork(), r12 is 0, so we must copy the
+ * argument as well.
+ *
+ * r0 : The argument to the main thread function
+ * r1 : The address of do_exit
+ * r2 : The address of the main thread function
+ */
+asmlinkage extern void kernel_thread_helper(void);
+__asm__(" .type kernel_thread_helper, @function\n"
+ "kernel_thread_helper:\n"
+ " mov r12, r0\n"
+ " mov lr, r2\n"
+ " mov pc, r1\n"
+ " .size kernel_thread_helper, . - kernel_thread_helper");
+
+int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+ struct pt_regs regs;
+
+ memset(&regs, 0, sizeof(regs));
+
+ regs.r0 = (unsigned long)arg;
+ regs.r1 = (unsigned long)fn;
+ regs.r2 = (unsigned long)do_exit;
+ regs.lr = (unsigned long)kernel_thread_helper;
+ regs.pc = (unsigned long)kernel_thread_helper;
+ regs.sr = MODE_SUPERVISOR;
+
+ return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
+ 0, &regs, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/*
+ * Free current thread data structures etc
+ */
+void exit_thread(void)
+{
+ /* nothing to do */
+}
+
+void flush_thread(void)
+{
+ /* nothing to do */
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+ /* do nothing */
+}
+
+static const char *cpu_modes[] = {
+ "Application", "Supervisor", "Interrupt level 0", "Interrupt level 1",
+ "Interrupt level 2", "Interrupt level 3", "Exception", "NMI"
+};
+
+void show_regs(struct pt_regs *regs)
+{
+ unsigned long sp = regs->sp;
+ unsigned long lr = regs->lr;
+ unsigned long mode = (regs->sr & MODE_MASK) >> MODE_SHIFT;
+
+ if (!user_mode(regs))
+ sp = (unsigned long)regs + FRAME_SIZE_FULL;
+
+ print_symbol("PC is at %s\n", instruction_pointer(regs));
+ print_symbol("LR is at %s\n", lr);
+ printk("pc : [<%08lx>] lr : [<%08lx>] %s\n"
+ "sp : %08lx r12: %08lx r11: %08lx\n",
+ instruction_pointer(regs),
+ lr, print_tainted(), sp, regs->r12, regs->r11);
+ printk("r10: %08lx r9 : %08lx r8 : %08lx\n",
+ regs->r10, regs->r9, regs->r8);
+ printk("r7 : %08lx r6 : %08lx r5 : %08lx r4 : %08lx\n",
+ regs->r7, regs->r6, regs->r5, regs->r4);
+ printk("r3 : %08lx r2 : %08lx r1 : %08lx r0 : %08lx\n",
+ regs->r3, regs->r2, regs->r1, regs->r0);
+ printk("Flags: %c%c%c%c%c\n",
+ regs->sr & SR_Q ? 'Q' : 'q',
+ regs->sr & SR_V ? 'V' : 'v',
+ regs->sr & SR_N ? 'N' : 'n',
+ regs->sr & SR_Z ? 'Z' : 'z',
+ regs->sr & SR_C ? 'C' : 'c');
+ printk("Mode bits: %c%c%c%c%c%c%c%c%c\n",
+ regs->sr & SR_H ? 'H' : 'h',
+ regs->sr & SR_R ? 'R' : 'r',
+ regs->sr & SR_J ? 'J' : 'j',
+ regs->sr & SR_EM ? 'E' : 'e',
+ regs->sr & SR_I3M ? '3' : '.',
+ regs->sr & SR_I2M ? '2' : '.',
+ regs->sr & SR_I1M ? '1' : '.',
+ regs->sr & SR_I0M ? '0' : '.',
+ regs->sr & SR_GM ? 'G' : 'g');
+ printk("CPU Mode: %s\n", cpu_modes[mode]);
+
+ show_trace(NULL, (unsigned long *)sp, regs);
+}
+EXPORT_SYMBOL(show_regs);
+
+/* Fill in the fpu structure for a core dump. This is easy -- we don't have any */
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+ /* Not valid */
+ return 0;
+}
+
+asmlinkage void ret_from_fork(void);
+
+int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+ unsigned long unused,
+ struct task_struct *p, struct pt_regs *regs)
+{
+ struct pt_regs *childregs;
+
+ childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)p->thread_info)) - 1;
+ *childregs = *regs;
+
+ if (user_mode(regs))
+ childregs->sp = usp;
+ else
+ childregs->sp = (unsigned long)p->thread_info + THREAD_SIZE;
+
+ childregs->r12 = 0; /* Set return value for child */
+
+ p->thread.cpu_context.sr = MODE_SUPERVISOR | SR_GM;
+ p->thread.cpu_context.ksp = (unsigned long)childregs;
+ p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
+
+ return 0;
+}
+
+/* r12-r8 are dummy parameters to force the compiler to use the stack */
+asmlinkage int sys_fork(struct pt_regs *regs)
+{
+ return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
+}
+
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+ unsigned long parent_tidptr,
+ unsigned long child_tidptr, struct pt_regs *regs)
+{
+ if (!newsp)
+ newsp = regs->sp;
+ return do_fork(clone_flags, newsp, regs, 0,
+ (int __user *)parent_tidptr,
+ (int __user *)child_tidptr);
+}
+
+asmlinkage int sys_vfork(struct pt_regs *regs)
+{
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs,
+ 0, NULL, NULL);
+}
+
+asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv,
+ char __user *__user *uenvp, struct pt_regs *regs)
+{
+ int error;
+ char *filename;
+
+ filename = getname(ufilename);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ goto out;
+
+ error = do_execve(filename, uargv, uenvp, regs);
+ if (error == 0)
+ current->ptrace &= ~PT_DTRACE;
+ putname(filename);
+
+out:
+ return error;
+}
+
+
+/*
+ * This function is supposed to answer the question "who called
+ * schedule()?"
+ */
+unsigned long get_wchan(struct task_struct *p)
+{
+ unsigned long pc;
+ unsigned long stack_page;
+
+ if (!p || p == current || p->state == TASK_RUNNING)
+ return 0;
+
+ stack_page = (unsigned long)p->thread_info;
+ BUG_ON(!stack_page);
+
+ /*
+ * The stored value of PC is either the address right after
+ * the call to __switch_to() or ret_from_fork.
+ */
+ pc = thread_saved_pc(p);
+ if (in_sched_functions(pc)) {
+#ifdef CONFIG_FRAME_POINTER
+ unsigned long fp = p->thread.cpu_context.r7;
+ BUG_ON(fp < stack_page || fp > (THREAD_SIZE + stack_page));
+ pc = *(unsigned long *)fp;
+#else
+ /*
+ * We depend on the frame size of schedule here, which
+ * is actually quite ugly. It might be possible to
+ * determine the frame size automatically at build
+ * time by doing this:
+ * - compile sched.c
+ * - disassemble the resulting sched.o
+ * - look for 'sub sp,??' shortly after '<schedule>:'
+ */
+ unsigned long sp = p->thread.cpu_context.ksp + 16;
+ BUG_ON(sp < stack_page || sp > (THREAD_SIZE + stack_page));
+ pc = *(unsigned long *)sp;
+#endif
+ }
+
+ return pc;
+}
diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c
new file mode 100644
index 000000000000..3c89e59029ab
--- /dev/null
+++ b/arch/avr32/kernel/ptrace.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#undef DEBUG
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/unistd.h>
+#include <linux/notifier.h>
+
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+#include <asm/ocd.h>
+#include <asm/mmu_context.h>
+#include <asm/kdebug.h>
+
+static struct pt_regs *get_user_regs(struct task_struct *tsk)
+{
+ return (struct pt_regs *)((unsigned long) tsk->thread_info +
+ THREAD_SIZE - sizeof(struct pt_regs));
+}
+
+static void ptrace_single_step(struct task_struct *tsk)
+{
+ pr_debug("ptrace_single_step: pid=%u, SR=0x%08lx\n",
+ tsk->pid, tsk->thread.cpu_context.sr);
+ if (!(tsk->thread.cpu_context.sr & SR_D)) {
+ /*
+ * Set a breakpoint at the current pc to force the
+ * process into debug mode. The syscall/exception
+ * exit code will set a breakpoint at the return
+ * address when this flag is set.
+ */
+ pr_debug("ptrace_single_step: Setting TIF_BREAKPOINT\n");
+ set_tsk_thread_flag(tsk, TIF_BREAKPOINT);
+ }
+
+ /* The monitor code will do the actual step for us */
+ set_tsk_thread_flag(tsk, TIF_SINGLE_STEP);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching
+ *
+ * Make sure any single step bits, etc. are not set
+ */
+void ptrace_disable(struct task_struct *child)
+{
+ clear_tsk_thread_flag(child, TIF_SINGLE_STEP);
+}
+
+/*
+ * Handle hitting a breakpoint
+ */
+static void ptrace_break(struct task_struct *tsk, struct pt_regs *regs)
+{
+ siginfo_t info;
+
+ info.si_signo = SIGTRAP;
+ info.si_errno = 0;
+ info.si_code = TRAP_BRKPT;
+ info.si_addr = (void __user *)instruction_pointer(regs);
+
+ pr_debug("ptrace_break: Sending SIGTRAP to PID %u (pc = 0x%p)\n",
+ tsk->pid, info.si_addr);
+ force_sig_info(SIGTRAP, &info, tsk);
+}
+
+/*
+ * Read the word at offset "offset" into the task's "struct user". We
+ * actually access the pt_regs struct stored on the kernel stack.
+ */
+static int ptrace_read_user(struct task_struct *tsk, unsigned long offset,
+ unsigned long __user *data)
+{
+ unsigned long *regs;
+ unsigned long value;
+
+ pr_debug("ptrace_read_user(%p, %#lx, %p)\n",
+ tsk, offset, data);
+
+ if (offset & 3 || offset >= sizeof(struct user)) {
+ printk("ptrace_read_user: invalid offset 0x%08lx\n", offset);
+ return -EIO;
+ }
+
+ regs = (unsigned long *)get_user_regs(tsk);
+
+ value = 0;
+ if (offset < sizeof(struct pt_regs))
+ value = regs[offset / sizeof(regs[0])];
+
+ return put_user(value, data);
+}
+
+/*
+ * Write the word "value" to offset "offset" into the task's "struct
+ * user". We actually access the pt_regs struct stored on the kernel
+ * stack.
+ */
+static int ptrace_write_user(struct task_struct *tsk, unsigned long offset,
+ unsigned long value)
+{
+ unsigned long *regs;
+
+ if (offset & 3 || offset >= sizeof(struct user)) {
+ printk("ptrace_write_user: invalid offset 0x%08lx\n", offset);
+ return -EIO;
+ }
+
+ if (offset >= sizeof(struct pt_regs))
+ return 0;
+
+ regs = (unsigned long *)get_user_regs(tsk);
+ regs[offset / sizeof(regs[0])] = value;
+
+ return 0;
+}
+
+static int ptrace_getregs(struct task_struct *tsk, void __user *uregs)
+{
+ struct pt_regs *regs = get_user_regs(tsk);
+
+ return copy_to_user(uregs, regs, sizeof(*regs)) ? -EFAULT : 0;
+}
+
+static int ptrace_setregs(struct task_struct *tsk, const void __user *uregs)
+{
+ struct pt_regs newregs;
+ int ret;
+
+ ret = -EFAULT;
+ if (copy_from_user(&newregs, uregs, sizeof(newregs)) == 0) {
+ struct pt_regs *regs = get_user_regs(tsk);
+
+ ret = -EINVAL;
+ if (valid_user_regs(&newregs)) {
+ *regs = newregs;
+ ret = 0;
+ }
+ }
+
+ return ret;
+}
+
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+{
+ unsigned long tmp;
+ int ret;
+
+ pr_debug("arch_ptrace(%ld, %ld, %#lx, %#lx)\n",
+ request, child->pid, addr, data);
+
+ pr_debug("ptrace: Enabling monitor mode...\n");
+ __mtdr(DBGREG_DC, __mfdr(DBGREG_DC) | DC_MM | DC_DBE);
+
+ switch (request) {
+ /* Read the word at location addr in the child process */
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA:
+ ret = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+ if (ret == sizeof(tmp))
+ ret = put_user(tmp, (unsigned long __user *)data);
+ else
+ ret = -EIO;
+ break;
+
+ case PTRACE_PEEKUSR:
+ ret = ptrace_read_user(child, addr,
+ (unsigned long __user *)data);
+ break;
+
+ /* Write the word in data at location addr */
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEDATA:
+ ret = access_process_vm(child, addr, &data, sizeof(data), 1);
+ if (ret == sizeof(data))
+ ret = 0;
+ else
+ ret = -EIO;
+ break;
+
+ case PTRACE_POKEUSR:
+ ret = ptrace_write_user(child, addr, data);
+ break;
+
+ /* continue and stop at next (return from) syscall */
+ case PTRACE_SYSCALL:
+ /* restart after signal */
+ case PTRACE_CONT:
+ ret = -EIO;
+ if (!valid_signal(data))
+ break;
+ if (request == PTRACE_SYSCALL)
+ set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ else
+ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ child->exit_code = data;
+ /* XXX: Are we sure no breakpoints are active here? */
+ wake_up_process(child);
+ ret = 0;
+ break;
+
+ /*
+ * Make the child exit. Best I can do is send it a
+ * SIGKILL. Perhaps it should be put in the status that it
+ * wants to exit.
+ */
+ case PTRACE_KILL:
+ ret = 0;
+ if (child->exit_state == EXIT_ZOMBIE)
+ break;
+ child->exit_code = SIGKILL;
+ wake_up_process(child);
+ break;
+
+ /*
+ * execute single instruction.
+ */
+ case PTRACE_SINGLESTEP:
+ ret = -EIO;
+ if (!valid_signal(data))
+ break;
+ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ ptrace_single_step(child);
+ child->exit_code = data;
+ wake_up_process(child);
+ ret = 0;
+ break;
+
+ /* Detach a process that was attached */
+ case PTRACE_DETACH:
+ ret = ptrace_detach(child, data);
+ break;
+
+ case PTRACE_GETREGS:
+ ret = ptrace_getregs(child, (void __user *)data);
+ break;
+
+ case PTRACE_SETREGS:
+ ret = ptrace_setregs(child, (const void __user *)data);
+ break;
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ break;
+ }
+
+ pr_debug("sys_ptrace returning %d (DC = 0x%08lx)\n", ret, __mfdr(DBGREG_DC));
+ return ret;
+}
+
+asmlinkage void syscall_trace(void)
+{
+ pr_debug("syscall_trace called\n");
+ if (!test_thread_flag(TIF_SYSCALL_TRACE))
+ return;
+ if (!(current->ptrace & PT_PTRACED))
+ return;
+
+ pr_debug("syscall_trace: notifying parent\n");
+ /* The 0x80 provides a way for the tracing parent to
+ * distinguish between a syscall stop and SIGTRAP delivery */
+ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+ ? 0x80 : 0));
+
+ /*
+ * this isn't the same as continuing with a signal, but it
+ * will do for normal use. strace only continues with a
+ * signal if the stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code) {
+ pr_debug("syscall_trace: sending signal %d to PID %u\n",
+ current->exit_code, current->pid);
+ send_sig(current->exit_code, current, 1);
+ current->exit_code = 0;
+ }
+}
+
+asmlinkage void do_debug_priv(struct pt_regs *regs)
+{
+ unsigned long dc, ds;
+ unsigned long die_val;
+
+ ds = __mfdr(DBGREG_DS);
+
+ pr_debug("do_debug_priv: pc = %08lx, ds = %08lx\n", regs->pc, ds);
+
+ if (ds & DS_SSS)
+ die_val = DIE_SSTEP;
+ else
+ die_val = DIE_BREAKPOINT;
+
+ if (notify_die(die_val, regs, 0, SIGTRAP) == NOTIFY_STOP)
+ return;
+
+ if (likely(ds & DS_SSS)) {
+ extern void itlb_miss(void);
+ extern void tlb_miss_common(void);
+ struct thread_info *ti;
+
+ dc = __mfdr(DBGREG_DC);
+ dc &= ~DC_SS;
+ __mtdr(DBGREG_DC, dc);
+
+ ti = current_thread_info();
+ ti->flags |= _TIF_BREAKPOINT;
+
+ /* The TLB miss handlers don't check thread flags */
+ if ((regs->pc >= (unsigned long)&itlb_miss)
+ && (regs->pc <= (unsigned long)&tlb_miss_common)) {
+ __mtdr(DBGREG_BWA2A, sysreg_read(RAR_EX));
+ __mtdr(DBGREG_BWC2A, 0x40000001 | (get_asid() << 1));
+ }
+
+ /*
+ * If we're running in supervisor mode, the breakpoint
+ * will take us where we want directly, no need to
+ * single step.
+ */
+ if ((regs->sr & MODE_MASK) != MODE_SUPERVISOR)
+ ti->flags |= TIF_SINGLE_STEP;
+ } else {
+ panic("Unable to handle debug trap at pc = %08lx\n",
+ regs->pc);
+ }
+}
+
+/*
+ * Handle breakpoints, single steps and other debuggy things. To keep
+ * things simple initially, we run with interrupts and exceptions
+ * disabled all the time.
+ */
+asmlinkage void do_debug(struct pt_regs *regs)
+{
+ unsigned long dc, ds;
+
+ ds = __mfdr(DBGREG_DS);
+ pr_debug("do_debug: pc = %08lx, ds = %08lx\n", regs->pc, ds);
+
+ if (test_thread_flag(TIF_BREAKPOINT)) {
+ pr_debug("TIF_BREAKPOINT set\n");
+ /* We're taking care of it */
+ clear_thread_flag(TIF_BREAKPOINT);
+ __mtdr(DBGREG_BWC2A, 0);
+ }
+
+ if (test_thread_flag(TIF_SINGLE_STEP)) {
+ pr_debug("TIF_SINGLE_STEP set, ds = 0x%08lx\n", ds);
+ if (ds & DS_SSS) {
+ dc = __mfdr(DBGREG_DC);
+ dc &= ~DC_SS;
+ __mtdr(DBGREG_DC, dc);
+
+ clear_thread_flag(TIF_SINGLE_STEP);
+ ptrace_break(current, regs);
+ }
+ } else {
+ /* regular breakpoint */
+ ptrace_break(current, regs);
+ }
+}
diff --git a/arch/avr32/kernel/semaphore.c b/arch/avr32/kernel/semaphore.c
new file mode 100644
index 000000000000..1e2705a05016
--- /dev/null
+++ b/arch/avr32/kernel/semaphore.c
@@ -0,0 +1,148 @@
+/*
+ * AVR32 sempahore implementation.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/i386/kernel/semaphore.c
+ * Copyright (C) 1999 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+
+#include <asm/semaphore.h>
+#include <asm/atomic.h>
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ * - only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - when we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleeper" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+void __up(struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+EXPORT_SYMBOL(__up);
+
+void __sched __down(struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * the wait_queue_head.
+ */
+ if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ tsk->state = TASK_RUNNING;
+}
+EXPORT_SYMBOL(__down);
+
+int __sched __down_interruptible(struct semaphore *sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into the trylock
+ * failure case - we won't be sleeping, and we can't
+ * get the lock as it has contention. Just correct the
+ * count and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * the wait_queue_head.
+ */
+ if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_INTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ tsk->state = TASK_RUNNING;
+ return retval;
+}
+EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
new file mode 100644
index 000000000000..5d68f3c6990b
--- /dev/null
+++ b/arch/avr32/kernel/setup.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/console.h>
+#include <linux/ioport.h>
+#include <linux/bootmem.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/root_dev.h>
+#include <linux/cpu.h>
+
+#include <asm/sections.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+#include <asm/sysreg.h>
+
+#include <asm/arch/board.h>
+#include <asm/arch/init.h>
+
+extern int root_mountflags;
+
+/*
+ * Bootloader-provided information about physical memory
+ */
+struct tag_mem_range *mem_phys;
+struct tag_mem_range *mem_reserved;
+struct tag_mem_range *mem_ramdisk;
+
+/*
+ * Initialize loops_per_jiffy as 5000000 (500MIPS).
+ * Better make it too large than too small...
+ */
+struct avr32_cpuinfo boot_cpu_data = {
+ .loops_per_jiffy = 5000000
+};
+EXPORT_SYMBOL(boot_cpu_data);
+
+static char command_line[COMMAND_LINE_SIZE];
+
+/*
+ * Should be more than enough, but if you have a _really_ complex
+ * setup, you might need to increase the size of this...
+ */
+static struct tag_mem_range __initdata mem_range_cache[32];
+static unsigned mem_range_next_free;
+
+/*
+ * Standard memory resources
+ */
+static struct resource mem_res[] = {
+ {
+ .name = "Kernel code",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_MEM
+ },
+ {
+ .name = "Kernel data",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+#define kernel_code mem_res[0]
+#define kernel_data mem_res[1]
+
+/*
+ * Early framebuffer allocation. Works as follows:
+ * - If fbmem_size is zero, nothing will be allocated or reserved.
+ * - If fbmem_start is zero when setup_bootmem() is called,
+ * fbmem_size bytes will be allocated from the bootmem allocator.
+ * - If fbmem_start is nonzero, an area of size fbmem_size will be
+ * reserved at the physical address fbmem_start if necessary. If
+ * the area isn't in a memory region known to the kernel, it will
+ * be left alone.
+ *
+ * Board-specific code may use these variables to set up platform data
+ * for the framebuffer driver if fbmem_size is nonzero.
+ */
+static unsigned long __initdata fbmem_start;
+static unsigned long __initdata fbmem_size;
+
+/*
+ * "fbmem=xxx[kKmM]" allocates the specified amount of boot memory for
+ * use as framebuffer.
+ *
+ * "fbmem=xxx[kKmM]@yyy[kKmM]" defines a memory region of size xxx and
+ * starting at yyy to be reserved for use as framebuffer.
+ *
+ * The kernel won't verify that the memory region starting at yyy
+ * actually contains usable RAM.
+ */
+static int __init early_parse_fbmem(char *p)
+{
+ fbmem_size = memparse(p, &p);
+ if (*p == '@')
+ fbmem_start = memparse(p, &p);
+ return 0;
+}
+early_param("fbmem", early_parse_fbmem);
+
+static inline void __init resource_init(void)
+{
+ struct tag_mem_range *region;
+
+ kernel_code.start = __pa(init_mm.start_code);
+ kernel_code.end = __pa(init_mm.end_code - 1);
+ kernel_data.start = __pa(init_mm.end_code);
+ kernel_data.end = __pa(init_mm.brk - 1);
+
+ for (region = mem_phys; region; region = region->next) {
+ struct resource *res;
+ unsigned long phys_start, phys_end;
+
+ if (region->size == 0)
+ continue;
+
+ phys_start = region->addr;
+ phys_end = phys_start + region->size - 1;
+
+ res = alloc_bootmem_low(sizeof(*res));
+ res->name = "System RAM";
+ res->start = phys_start;
+ res->end = phys_end;
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+ request_resource (&iomem_resource, res);
+
+ if (kernel_code.start >= res->start &&
+ kernel_code.end <= res->end)
+ request_resource (res, &kernel_code);
+ if (kernel_data.start >= res->start &&
+ kernel_data.end <= res->end)
+ request_resource (res, &kernel_data);
+ }
+}
+
+static int __init parse_tag_core(struct tag *tag)
+{
+ if (tag->hdr.size > 2) {
+ if ((tag->u.core.flags & 1) == 0)
+ root_mountflags &= ~MS_RDONLY;
+ ROOT_DEV = new_decode_dev(tag->u.core.rootdev);
+ }
+ return 0;
+}
+__tagtable(ATAG_CORE, parse_tag_core);
+
+static int __init parse_tag_mem_range(struct tag *tag,
+ struct tag_mem_range **root)
+{
+ struct tag_mem_range *cur, **pprev;
+ struct tag_mem_range *new;
+
+ /*
+ * Ignore zero-sized entries. If we're running standalone, the
+ * SDRAM code may emit such entries if something goes
+ * wrong...
+ */
+ if (tag->u.mem_range.size == 0)
+ return 0;
+
+ /*
+ * Copy the data so the bootmem init code doesn't need to care
+ * about it.
+ */
+ if (mem_range_next_free >=
+ (sizeof(mem_range_cache) / sizeof(mem_range_cache[0])))
+ panic("Physical memory map too complex!\n");
+
+ new = &mem_range_cache[mem_range_next_free++];
+ *new = tag->u.mem_range;
+
+ pprev = root;
+ cur = *root;
+ while (cur) {
+ pprev = &cur->next;
+ cur = cur->next;
+ }
+
+ *pprev = new;
+ new->next = NULL;
+
+ return 0;
+}
+
+static int __init parse_tag_mem(struct tag *tag)
+{
+ return parse_tag_mem_range(tag, &mem_phys);
+}
+__tagtable(ATAG_MEM, parse_tag_mem);
+
+static int __init parse_tag_cmdline(struct tag *tag)
+{
+ strlcpy(saved_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE);
+ return 0;
+}
+__tagtable(ATAG_CMDLINE, parse_tag_cmdline);
+
+static int __init parse_tag_rdimg(struct tag *tag)
+{
+ return parse_tag_mem_range(tag, &mem_ramdisk);
+}
+__tagtable(ATAG_RDIMG, parse_tag_rdimg);
+
+static int __init parse_tag_clock(struct tag *tag)
+{
+ /*
+ * We'll figure out the clocks by peeking at the system
+ * manager regs directly.
+ */
+ return 0;
+}
+__tagtable(ATAG_CLOCK, parse_tag_clock);
+
+static int __init parse_tag_rsvd_mem(struct tag *tag)
+{
+ return parse_tag_mem_range(tag, &mem_reserved);
+}
+__tagtable(ATAG_RSVD_MEM, parse_tag_rsvd_mem);
+
+static int __init parse_tag_ethernet(struct tag *tag)
+{
+#if 0
+ const struct platform_device *pdev;
+
+ /*
+ * We really need a bus type that supports "classes"...this
+ * will do for now (until we must handle other kinds of
+ * ethernet controllers)
+ */
+ pdev = platform_get_device("macb", tag->u.ethernet.mac_index);
+ if (pdev && pdev->dev.platform_data) {
+ struct eth_platform_data *data = pdev->dev.platform_data;
+
+ data->valid = 1;
+ data->mii_phy_addr = tag->u.ethernet.mii_phy_addr;
+ memcpy(data->hw_addr, tag->u.ethernet.hw_address,
+ sizeof(data->hw_addr));
+ }
+#endif
+ return 0;
+}
+__tagtable(ATAG_ETHERNET, parse_tag_ethernet);
+
+/*
+ * Scan the tag table for this tag, and call its parse function. The
+ * tag table is built by the linker from all the __tagtable
+ * declarations.
+ */
+static int __init parse_tag(struct tag *tag)
+{
+ extern struct tagtable __tagtable_begin, __tagtable_end;
+ struct tagtable *t;
+
+ for (t = &__tagtable_begin; t < &__tagtable_end; t++)
+ if (tag->hdr.tag == t->tag) {
+ t->parse(tag);
+ break;
+ }
+
+ return t < &__tagtable_end;
+}
+
+/*
+ * Parse all tags in the list we got from the boot loader
+ */
+static void __init parse_tags(struct tag *t)
+{
+ for (; t->hdr.tag != ATAG_NONE; t = tag_next(t))
+ if (!parse_tag(t))
+ printk(KERN_WARNING
+ "Ignoring unrecognised tag 0x%08x\n",
+ t->hdr.tag);
+}
+
+void __init setup_arch (char **cmdline_p)
+{
+ struct clk *cpu_clk;
+
+ parse_tags(bootloader_tags);
+
+ setup_processor();
+ setup_platform();
+
+ cpu_clk = clk_get(NULL, "cpu");
+ if (IS_ERR(cpu_clk)) {
+ printk(KERN_WARNING "Warning: Unable to get CPU clock\n");
+ } else {
+ unsigned long cpu_hz = clk_get_rate(cpu_clk);
+
+ /*
+ * Well, duh, but it's probably a good idea to
+ * increment the use count.
+ */
+ clk_enable(cpu_clk);
+
+ boot_cpu_data.clk = cpu_clk;
+ boot_cpu_data.loops_per_jiffy = cpu_hz * 4;
+ printk("CPU: Running at %lu.%03lu MHz\n",
+ ((cpu_hz + 500) / 1000) / 1000,
+ ((cpu_hz + 500) / 1000) % 1000);
+ }
+
+ init_mm.start_code = (unsigned long) &_text;
+ init_mm.end_code = (unsigned long) &_etext;
+ init_mm.end_data = (unsigned long) &_edata;
+ init_mm.brk = (unsigned long) &_end;
+
+ strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
+ *cmdline_p = command_line;
+ parse_early_param();
+
+ setup_bootmem();
+
+ board_setup_fbmem(fbmem_start, fbmem_size);
+
+#ifdef CONFIG_VT
+ conswitchp = &dummy_con;
+#endif
+
+ paging_init();
+
+ resource_init();
+}
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
new file mode 100644
index 000000000000..33096651c24f
--- /dev/null
+++ b/arch/avr32/kernel/signal.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/sh/kernel/signal.c
+ * Copyright (C) 1999, 2000 Niibe Yutaka & Kaz Kojima
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/suspend.h>
+
+#include <asm/uaccess.h>
+#include <asm/ucontext.h>
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+ struct pt_regs *regs)
+{
+ return do_sigaltstack(uss, uoss, regs->sp);
+}
+
+struct rt_sigframe
+{
+ struct siginfo info;
+ struct ucontext uc;
+ unsigned long retcode;
+};
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+{
+ int err = 0;
+
+#define COPY(x) err |= __get_user(regs->x, &sc->x)
+ COPY(sr);
+ COPY(pc);
+ COPY(lr);
+ COPY(sp);
+ COPY(r12);
+ COPY(r11);
+ COPY(r10);
+ COPY(r9);
+ COPY(r8);
+ COPY(r7);
+ COPY(r6);
+ COPY(r5);
+ COPY(r4);
+ COPY(r3);
+ COPY(r2);
+ COPY(r1);
+ COPY(r0);
+#undef COPY
+
+ /*
+ * Don't allow anyone to pretend they're running in supervisor
+ * mode or something...
+ */
+ err |= !valid_user_regs(regs);
+
+ return err;
+}
+
+
+asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
+{
+ struct rt_sigframe __user *frame;
+ sigset_t set;
+
+ frame = (struct rt_sigframe __user *)regs->sp;
+ pr_debug("SIG return: frame = %p\n", frame);
+
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+ goto badframe;
+
+ if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ goto badframe;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ spin_lock_irq(&current->sighand->siglock);
+ current->blocked = set;
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ goto badframe;
+
+ pr_debug("Context restored: pc = %08lx, lr = %08lx, sp = %08lx\n",
+ regs->pc, regs->lr, regs->sp);
+
+ return regs->r12;
+
+badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+static int
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
+{
+ int err = 0;
+
+#define COPY(x) err |= __put_user(regs->x, &sc->x)
+ COPY(sr);
+ COPY(pc);
+ COPY(lr);
+ COPY(sp);
+ COPY(r12);
+ COPY(r11);
+ COPY(r10);
+ COPY(r9);
+ COPY(r8);
+ COPY(r7);
+ COPY(r6);
+ COPY(r5);
+ COPY(r4);
+ COPY(r3);
+ COPY(r2);
+ COPY(r1);
+ COPY(r0);
+#undef COPY
+
+ return err;
+}
+
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, int framesize)
+{
+ unsigned long sp = regs->sp;
+
+ if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp))
+ sp = current->sas_ss_sp + current->sas_ss_size;
+
+ return (void __user *)((sp - framesize) & ~3);
+}
+
+static int
+setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
+{
+ struct rt_sigframe __user *frame;
+ int err = 0;
+
+ frame = get_sigframe(ka, regs, sizeof(*frame));
+ err = -EFAULT;
+ if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
+ goto out;
+
+ /*
+ * Set up the return code:
+ *
+ * mov r8, __NR_rt_sigreturn
+ * scall
+ *
+ * Note: This will blow up since we're using a non-executable
+ * stack. Better use SA_RESTORER.
+ */
+#if __NR_rt_sigreturn > 127
+# error __NR_rt_sigreturn must be < 127 to fit in a short mov
+#endif
+ err = __put_user(0x3008d733 | (__NR_rt_sigreturn << 20),
+ &frame->retcode);
+
+ err |= copy_siginfo_to_user(&frame->info, info);
+
+ /* Set up the ucontext */
+ err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(NULL, &frame->uc.uc_link);
+ err |= __put_user((void __user *)current->sas_ss_sp,
+ &frame->uc.uc_stack.ss_sp);
+ err |= __put_user(sas_ss_flags(regs->sp),
+ &frame->uc.uc_stack.ss_flags);
+ err |= __put_user(current->sas_ss_size,
+ &frame->uc.uc_stack.ss_size);
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, regs);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+ if (err)
+ goto out;
+
+ regs->r12 = sig;
+ regs->r11 = (unsigned long) &frame->info;
+ regs->r10 = (unsigned long) &frame->uc;
+ regs->sp = (unsigned long) frame;
+ if (ka->sa.sa_flags & SA_RESTORER)
+ regs->lr = (unsigned long)ka->sa.sa_restorer;
+ else {
+ printk(KERN_NOTICE "[%s:%d] did not set SA_RESTORER\n",
+ current->comm, current->pid);
+ regs->lr = (unsigned long) &frame->retcode;
+ }
+
+ pr_debug("SIG deliver [%s:%d]: sig=%d sp=0x%lx pc=0x%lx->0x%p lr=0x%lx\n",
+ current->comm, current->pid, sig, regs->sp,
+ regs->pc, ka->sa.sa_handler, regs->lr);
+
+ regs->pc = (unsigned long) ka->sa.sa_handler;
+
+out:
+ return err;
+}
+
+static inline void restart_syscall(struct pt_regs *regs)
+{
+ if (regs->r12 == -ERESTART_RESTARTBLOCK)
+ regs->r8 = __NR_restart_syscall;
+ else
+ regs->r12 = regs->r12_orig;
+ regs->pc -= 2;
+}
+
+static inline void
+handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *oldset, struct pt_regs *regs, int syscall)
+{
+ int ret;
+
+ /*
+ * Set up the stack frame
+ */
+ ret = setup_rt_frame(sig, ka, info, oldset, regs);
+
+ /*
+ * Check that the resulting registers are sane
+ */
+ ret |= !valid_user_regs(regs);
+
+ /*
+ * Block the signal if we were unsuccessful.
+ */
+ if (ret != 0 || !(ka->sa.sa_flags & SA_NODEFER)) {
+ spin_lock_irq(&current->sighand->siglock);
+ sigorsets(&current->blocked, &current->blocked,
+ &ka->sa.sa_mask);
+ sigaddset(&current->blocked, sig);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+ }
+
+ if (ret == 0)
+ return;
+
+ force_sigsegv(sig, current);
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it
+ * doesn't want to handle. Thus you cannot kill init even with a
+ * SIGKILL even by mistake.
+ */
+int do_signal(struct pt_regs *regs, sigset_t *oldset, int syscall)
+{
+ siginfo_t info;
+ int signr;
+ struct k_sigaction ka;
+
+ /*
+ * We want the common case to go fast, which is why we may in
+ * certain cases get here from kernel mode. Just return
+ * without doing anything if so.
+ */
+ if (!user_mode(regs))
+ return 0;
+
+ if (try_to_freeze()) {
+ signr = 0;
+ if (!signal_pending(current))
+ goto no_signal;
+ }
+
+ if (test_thread_flag(TIF_RESTORE_SIGMASK))
+ oldset = &current->saved_sigmask;
+ else if (!oldset)
+ oldset = &current->blocked;
+
+ signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+no_signal:
+ if (syscall) {
+ switch (regs->r12) {
+ case -ERESTART_RESTARTBLOCK:
+ case -ERESTARTNOHAND:
+ if (signr > 0) {
+ regs->r12 = -EINTR;
+ break;
+ }
+ /* fall through */
+ case -ERESTARTSYS:
+ if (signr > 0 && !(ka.sa.sa_flags & SA_RESTART)) {
+ regs->r12 = -EINTR;
+ break;
+ }
+ /* fall through */
+ case -ERESTARTNOINTR:
+ restart_syscall(regs);
+ }
+ }
+
+ if (signr == 0) {
+ /* No signal to deliver -- put the saved sigmask back */
+ if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+ clear_thread_flag(TIF_RESTORE_SIGMASK);
+ sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+ }
+ return 0;
+ }
+
+ handle_signal(signr, &ka, &info, oldset, regs, syscall);
+ return 1;
+}
+
+asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
+{
+ int syscall = 0;
+
+ if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR)
+ syscall = 1;
+
+ if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
+ do_signal(regs, &current->blocked, syscall);
+}
diff --git a/arch/avr32/kernel/switch_to.S b/arch/avr32/kernel/switch_to.S
new file mode 100644
index 000000000000..a48d046723c5
--- /dev/null
+++ b/arch/avr32/kernel/switch_to.S
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/sysreg.h>
+
+ .text
+ .global __switch_to
+ .type __switch_to, @function
+
+ /* Switch thread context from "prev" to "next", returning "last"
+ * r12 : prev
+ * r11 : &prev->thread + 1
+ * r10 : &next->thread
+ */
+__switch_to:
+ stm --r11, r0,r1,r2,r3,r4,r5,r6,r7,sp,lr
+ mfsr r9, SYSREG_SR
+ st.w --r11, r9
+ ld.w r8, r10++
+ /*
+ * schedule() may have been called from a mode with a different
+ * set of registers. Make sure we don't lose anything here.
+ */
+ pushm r10,r12
+ mtsr SYSREG_SR, r8
+ frs /* flush the return stack */
+ sub pc, -2 /* flush the pipeline */
+ popm r10,r12
+ ldm r10++, r0,r1,r2,r3,r4,r5,r6,r7,sp,pc
+ .size __switch_to, . - __switch_to
diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c
new file mode 100644
index 000000000000..6ec5693da448
--- /dev/null
+++ b/arch/avr32/kernel/sys_avr32.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/unistd.h>
+
+#include <asm/mman.h>
+#include <asm/uaccess.h>
+
+asmlinkage int sys_pipe(unsigned long __user *filedes)
+{
+ int fd[2];
+ int error;
+
+ error = do_pipe(fd);
+ if (!error) {
+ if (copy_to_user(filedes, fd, sizeof(fd)))
+ error = -EFAULT;
+ }
+ return error;
+}
+
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, off_t offset)
+{
+ int error = -EBADF;
+ struct file *file = NULL;
+
+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+ if (!(flags & MAP_ANONYMOUS)) {
+ file = fget(fd);
+ if (!file)
+ return error;
+ }
+
+ down_write(&current->mm->mmap_sem);
+ error = do_mmap_pgoff(file, addr, len, prot, flags, offset);
+ up_write(&current->mm->mmap_sem);
+
+ if (file)
+ fput(file);
+ return error;
+}
diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S
new file mode 100644
index 000000000000..7589a9b426cb
--- /dev/null
+++ b/arch/avr32/kernel/syscall-stubs.S
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Stubs for syscalls that require access to pt_regs or that take more
+ * than five parameters.
+ */
+
+#define ARG6 r3
+
+ .text
+ .global __sys_rt_sigsuspend
+ .type __sys_rt_sigsuspend,@function
+__sys_rt_sigsuspend:
+ mov r10, sp
+ rjmp sys_rt_sigsuspend
+
+ .global __sys_sigaltstack
+ .type __sys_sigaltstack,@function
+__sys_sigaltstack:
+ mov r10, sp
+ rjmp sys_sigaltstack
+
+ .global __sys_rt_sigreturn
+ .type __sys_rt_sigreturn,@function
+__sys_rt_sigreturn:
+ mov r12, sp
+ rjmp sys_rt_sigreturn
+
+ .global __sys_fork
+ .type __sys_fork,@function
+__sys_fork:
+ mov r12, sp
+ rjmp sys_fork
+
+ .global __sys_clone
+ .type __sys_clone,@function
+__sys_clone:
+ mov r8, sp
+ rjmp sys_clone
+
+ .global __sys_vfork
+ .type __sys_vfork,@function
+__sys_vfork:
+ mov r12, sp
+ rjmp sys_vfork
+
+ .global __sys_execve
+ .type __sys_execve,@function
+__sys_execve:
+ mov r9, sp
+ rjmp sys_execve
+
+ .global __sys_mmap2
+ .type __sys_mmap2,@function
+__sys_mmap2:
+ pushm lr
+ st.w --sp, ARG6
+ rcall sys_mmap2
+ sub sp, -4
+ popm pc
+
+ .global __sys_sendto
+ .type __sys_sendto,@function
+__sys_sendto:
+ pushm lr
+ st.w --sp, ARG6
+ rcall sys_sendto
+ sub sp, -4
+ popm pc
+
+ .global __sys_recvfrom
+ .type __sys_recvfrom,@function
+__sys_recvfrom:
+ pushm lr
+ st.w --sp, ARG6
+ rcall sys_recvfrom
+ sub sp, -4
+ popm pc
+
+ .global __sys_pselect6
+ .type __sys_pselect6,@function
+__sys_pselect6:
+ pushm lr
+ st.w --sp, ARG6
+ rcall sys_pselect6
+ sub sp, -4
+ popm pc
+
+ .global __sys_splice
+ .type __sys_splice,@function
+__sys_splice:
+ pushm lr
+ st.w --sp, ARG6
+ rcall sys_splice
+ sub sp, -4
+ popm pc
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
new file mode 100644
index 000000000000..63b206965d05
--- /dev/null
+++ b/arch/avr32/kernel/syscall_table.S
@@ -0,0 +1,289 @@
+/*
+ * AVR32 system call table
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(CONFIG_NFSD) && !defined(CONFIG_NFSD_MODULE)
+#define sys_nfsservctl sys_ni_syscall
+#endif
+
+#if !defined(CONFIG_SYSV_IPC)
+# define sys_ipc sys_ni_syscall
+#endif
+
+ .section .rodata,"a",@progbits
+ .type sys_call_table,@object
+ .global sys_call_table
+ .align 2
+sys_call_table:
+ .long sys_restart_syscall
+ .long sys_exit
+ .long __sys_fork
+ .long sys_read
+ .long sys_write
+ .long sys_open /* 5 */
+ .long sys_close
+ .long sys_umask
+ .long sys_creat
+ .long sys_link
+ .long sys_unlink /* 10 */
+ .long __sys_execve
+ .long sys_chdir
+ .long sys_time
+ .long sys_mknod
+ .long sys_chmod /* 15 */
+ .long sys_chown
+ .long sys_lchown
+ .long sys_lseek
+ .long sys_llseek
+ .long sys_getpid /* 20 */
+ .long sys_mount
+ .long sys_umount
+ .long sys_setuid
+ .long sys_getuid
+ .long sys_stime /* 25 */
+ .long sys_ptrace
+ .long sys_alarm
+ .long sys_pause
+ .long sys_utime
+ .long sys_newstat /* 30 */
+ .long sys_newfstat
+ .long sys_newlstat
+ .long sys_access
+ .long sys_chroot
+ .long sys_sync /* 35 */
+ .long sys_fsync
+ .long sys_kill
+ .long sys_rename
+ .long sys_mkdir
+ .long sys_rmdir /* 40 */
+ .long sys_dup
+ .long sys_pipe
+ .long sys_times
+ .long __sys_clone
+ .long sys_brk /* 45 */
+ .long sys_setgid
+ .long sys_getgid
+ .long sys_getcwd
+ .long sys_geteuid
+ .long sys_getegid /* 50 */
+ .long sys_acct
+ .long sys_setfsuid
+ .long sys_setfsgid
+ .long sys_ioctl
+ .long sys_fcntl /* 55 */
+ .long sys_setpgid
+ .long sys_mremap
+ .long sys_setresuid
+ .long sys_getresuid
+ .long sys_setreuid /* 60 */
+ .long sys_setregid
+ .long sys_ustat
+ .long sys_dup2
+ .long sys_getppid
+ .long sys_getpgrp /* 65 */
+ .long sys_setsid
+ .long sys_rt_sigaction
+ .long __sys_rt_sigreturn
+ .long sys_rt_sigprocmask
+ .long sys_rt_sigpending /* 70 */
+ .long sys_rt_sigtimedwait
+ .long sys_rt_sigqueueinfo
+ .long __sys_rt_sigsuspend
+ .long sys_sethostname
+ .long sys_setrlimit /* 75 */
+ .long sys_getrlimit
+ .long sys_getrusage
+ .long sys_gettimeofday
+ .long sys_settimeofday
+ .long sys_getgroups /* 80 */
+ .long sys_setgroups
+ .long sys_select
+ .long sys_symlink
+ .long sys_fchdir
+ .long sys_readlink /* 85 */
+ .long sys_pread64
+ .long sys_pwrite64
+ .long sys_swapon
+ .long sys_reboot
+ .long __sys_mmap2 /* 90 */
+ .long sys_munmap
+ .long sys_truncate
+ .long sys_ftruncate
+ .long sys_fchmod
+ .long sys_fchown /* 95 */
+ .long sys_getpriority
+ .long sys_setpriority
+ .long sys_wait4
+ .long sys_statfs
+ .long sys_fstatfs /* 100 */
+ .long sys_vhangup
+ .long __sys_sigaltstack
+ .long sys_syslog
+ .long sys_setitimer
+ .long sys_getitimer /* 105 */
+ .long sys_swapoff
+ .long sys_sysinfo
+ .long sys_ipc
+ .long sys_sendfile
+ .long sys_setdomainname /* 110 */
+ .long sys_newuname
+ .long sys_adjtimex
+ .long sys_mprotect
+ .long __sys_vfork
+ .long sys_init_module /* 115 */
+ .long sys_delete_module
+ .long sys_quotactl
+ .long sys_getpgid
+ .long sys_bdflush
+ .long sys_sysfs /* 120 */
+ .long sys_personality
+ .long sys_ni_syscall /* reserved for afs_syscall */
+ .long sys_getdents
+ .long sys_flock
+ .long sys_msync /* 125 */
+ .long sys_readv
+ .long sys_writev
+ .long sys_getsid
+ .long sys_fdatasync
+ .long sys_sysctl /* 130 */
+ .long sys_mlock
+ .long sys_munlock
+ .long sys_mlockall
+ .long sys_munlockall
+ .long sys_sched_setparam /* 135 */
+ .long sys_sched_getparam
+ .long sys_sched_setscheduler
+ .long sys_sched_getscheduler
+ .long sys_sched_yield
+ .long sys_sched_get_priority_max /* 140 */
+ .long sys_sched_get_priority_min
+ .long sys_sched_rr_get_interval
+ .long sys_nanosleep
+ .long sys_poll
+ .long sys_nfsservctl /* 145 */
+ .long sys_setresgid
+ .long sys_getresgid
+ .long sys_prctl
+ .long sys_socket
+ .long sys_bind /* 150 */
+ .long sys_connect
+ .long sys_listen
+ .long sys_accept
+ .long sys_getsockname
+ .long sys_getpeername /* 155 */
+ .long sys_socketpair
+ .long sys_send
+ .long sys_recv
+ .long __sys_sendto
+ .long __sys_recvfrom /* 160 */
+ .long sys_shutdown
+ .long sys_setsockopt
+ .long sys_getsockopt
+ .long sys_sendmsg
+ .long sys_recvmsg /* 165 */
+ .long sys_truncate64
+ .long sys_ftruncate64
+ .long sys_stat64
+ .long sys_lstat64
+ .long sys_fstat64 /* 170 */
+ .long sys_pivot_root
+ .long sys_mincore
+ .long sys_madvise
+ .long sys_getdents64
+ .long sys_fcntl64 /* 175 */
+ .long sys_gettid
+ .long sys_readahead
+ .long sys_setxattr
+ .long sys_lsetxattr
+ .long sys_fsetxattr /* 180 */
+ .long sys_getxattr
+ .long sys_lgetxattr
+ .long sys_fgetxattr
+ .long sys_listxattr
+ .long sys_llistxattr /* 185 */
+ .long sys_flistxattr
+ .long sys_removexattr
+ .long sys_lremovexattr
+ .long sys_fremovexattr
+ .long sys_tkill /* 190 */
+ .long sys_sendfile64
+ .long sys_futex
+ .long sys_sched_setaffinity
+ .long sys_sched_getaffinity
+ .long sys_capget /* 195 */
+ .long sys_capset
+ .long sys_io_setup
+ .long sys_io_destroy
+ .long sys_io_getevents
+ .long sys_io_submit /* 200 */
+ .long sys_io_cancel
+ .long sys_fadvise64
+ .long sys_exit_group
+ .long sys_lookup_dcookie
+ .long sys_epoll_create /* 205 */
+ .long sys_epoll_ctl
+ .long sys_epoll_wait
+ .long sys_remap_file_pages
+ .long sys_set_tid_address
+ .long sys_timer_create /* 210 */
+ .long sys_timer_settime
+ .long sys_timer_gettime
+ .long sys_timer_getoverrun
+ .long sys_timer_delete
+ .long sys_clock_settime /* 215 */
+ .long sys_clock_gettime
+ .long sys_clock_getres
+ .long sys_clock_nanosleep
+ .long sys_statfs64
+ .long sys_fstatfs64 /* 220 */
+ .long sys_tgkill
+ .long sys_ni_syscall /* reserved for TUX */
+ .long sys_utimes
+ .long sys_fadvise64_64
+ .long sys_cacheflush /* 225 */
+ .long sys_ni_syscall /* sys_vserver */
+ .long sys_mq_open
+ .long sys_mq_unlink
+ .long sys_mq_timedsend
+ .long sys_mq_timedreceive /* 230 */
+ .long sys_mq_notify
+ .long sys_mq_getsetattr
+ .long sys_kexec_load
+ .long sys_waitid
+ .long sys_add_key /* 235 */
+ .long sys_request_key
+ .long sys_keyctl
+ .long sys_ioprio_set
+ .long sys_ioprio_get
+ .long sys_inotify_init /* 240 */
+ .long sys_inotify_add_watch
+ .long sys_inotify_rm_watch
+ .long sys_openat
+ .long sys_mkdirat
+ .long sys_mknodat /* 245 */
+ .long sys_fchownat
+ .long sys_futimesat
+ .long sys_fstatat64
+ .long sys_unlinkat
+ .long sys_renameat /* 250 */
+ .long sys_linkat
+ .long sys_symlinkat
+ .long sys_readlinkat
+ .long sys_fchmodat
+ .long sys_faccessat /* 255 */
+ .long __sys_pselect6
+ .long sys_ppoll
+ .long sys_unshare
+ .long sys_set_robust_list
+ .long sys_get_robust_list /* 260 */
+ .long __sys_splice
+ .long sys_sync_file_range
+ .long sys_tee
+ .long sys_vmsplice
+ .long sys_ni_syscall /* r8 is saturated at nr_syscalls */
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
new file mode 100644
index 000000000000..b0e6b5855a38
--- /dev/null
+++ b/arch/avr32/kernel/time.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on MIPS implementation arch/mips/kernel/time.c
+ * Copyright 2001 MontaVista Software Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/clocksource.h>
+#include <linux/time.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/profile.h>
+#include <linux/sysdev.h>
+
+#include <asm/div64.h>
+#include <asm/sysreg.h>
+#include <asm/io.h>
+#include <asm/sections.h>
+
+static cycle_t read_cycle_count(void)
+{
+ return (cycle_t)sysreg_read(COUNT);
+}
+
+static struct clocksource clocksource_avr32 = {
+ .name = "avr32",
+ .rating = 350,
+ .read = read_cycle_count,
+ .mask = CLOCKSOURCE_MASK(32),
+ .shift = 16,
+ .is_continuous = 1,
+};
+
+/*
+ * By default we provide the null RTC ops
+ */
+static unsigned long null_rtc_get_time(void)
+{
+ return mktime(2004, 1, 1, 0, 0, 0);
+}
+
+static int null_rtc_set_time(unsigned long sec)
+{
+ return 0;
+}
+
+static unsigned long (*rtc_get_time)(void) = null_rtc_get_time;
+static int (*rtc_set_time)(unsigned long) = null_rtc_set_time;
+
+/* how many counter cycles in a jiffy? */
+static unsigned long cycles_per_jiffy;
+
+/* cycle counter value at the previous timer interrupt */
+static unsigned int timerhi, timerlo;
+
+/* the count value for the next timer interrupt */
+static unsigned int expirelo;
+
+static void avr32_timer_ack(void)
+{
+ unsigned int count;
+
+ /* Ack this timer interrupt and set the next one */
+ expirelo += cycles_per_jiffy;
+ if (expirelo == 0) {
+ printk(KERN_DEBUG "expirelo == 0\n");
+ sysreg_write(COMPARE, expirelo + 1);
+ } else {
+ sysreg_write(COMPARE, expirelo);
+ }
+
+ /* Check to see if we have missed any timer interrupts */
+ count = sysreg_read(COUNT);
+ if ((count - expirelo) < 0x7fffffff) {
+ expirelo = count + cycles_per_jiffy;
+ sysreg_write(COMPARE, expirelo);
+ }
+}
+
+static unsigned int avr32_hpt_read(void)
+{
+ return sysreg_read(COUNT);
+}
+
+/*
+ * Taken from MIPS c0_hpt_timer_init().
+ *
+ * Why is it so complicated, and what is "count"? My assumption is
+ * that `count' specifies the "reference cycle", i.e. the cycle since
+ * reset that should mean "zero". The reason COUNT is written twice is
+ * probably to make sure we don't get any timer interrupts while we
+ * are messing with the counter.
+ */
+static void avr32_hpt_init(unsigned int count)
+{
+ count = sysreg_read(COUNT) - count;
+ expirelo = (count / cycles_per_jiffy + 1) * cycles_per_jiffy;
+ sysreg_write(COUNT, expirelo - cycles_per_jiffy);
+ sysreg_write(COMPARE, expirelo);
+ sysreg_write(COUNT, count);
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+ /* There must be better ways...? */
+ return (unsigned long long)jiffies * (1000000000 / HZ);
+}
+
+/*
+ * local_timer_interrupt() does profiling and process accounting on a
+ * per-CPU basis.
+ *
+ * In UP mode, it is invoked from the (global) timer_interrupt.
+ */
+static void local_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ if (current->pid)
+ profile_tick(CPU_PROFILING, regs);
+ update_process_times(user_mode(regs));
+}
+
+static irqreturn_t
+timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ unsigned int count;
+
+ /* ack timer interrupt and try to set next interrupt */
+ count = avr32_hpt_read();
+ avr32_timer_ack();
+
+ /* Update timerhi/timerlo for intra-jiffy calibration */
+ timerhi += count < timerlo; /* Wrap around */
+ timerlo = count;
+
+ /*
+ * Call the generic timer interrupt handler
+ */
+ write_seqlock(&xtime_lock);
+ do_timer(regs);
+ write_sequnlock(&xtime_lock);
+
+ /*
+ * In UP mode, we call local_timer_interrupt() to do profiling
+ * and process accounting.
+ *
+ * SMP is not supported yet.
+ */
+ local_timer_interrupt(irq, dev_id, regs);
+
+ return IRQ_HANDLED;
+}
+
+static struct irqaction timer_irqaction = {
+ .handler = timer_interrupt,
+ .flags = IRQF_DISABLED,
+ .name = "timer",
+};
+
+void __init time_init(void)
+{
+ unsigned long mult, shift, count_hz;
+ int ret;
+
+ xtime.tv_sec = rtc_get_time();
+ xtime.tv_nsec = 0;
+
+ set_normalized_timespec(&wall_to_monotonic,
+ -xtime.tv_sec, -xtime.tv_nsec);
+
+ printk("Before time_init: count=%08lx, compare=%08lx\n",
+ (unsigned long)sysreg_read(COUNT),
+ (unsigned long)sysreg_read(COMPARE));
+
+ count_hz = clk_get_rate(boot_cpu_data.clk);
+ shift = clocksource_avr32.shift;
+ mult = clocksource_hz2mult(count_hz, shift);
+ clocksource_avr32.mult = mult;
+
+ printk("Cycle counter: mult=%lu, shift=%lu\n", mult, shift);
+
+ {
+ u64 tmp;
+
+ tmp = TICK_NSEC;
+ tmp <<= shift;
+ tmp += mult / 2;
+ do_div(tmp, mult);
+
+ cycles_per_jiffy = tmp;
+ }
+
+ /* This sets up the high precision timer for the first interrupt. */
+ avr32_hpt_init(avr32_hpt_read());
+
+ printk("After time_init: count=%08lx, compare=%08lx\n",
+ (unsigned long)sysreg_read(COUNT),
+ (unsigned long)sysreg_read(COMPARE));
+
+ ret = clocksource_register(&clocksource_avr32);
+ if (ret)
+ printk(KERN_ERR
+ "timer: could not register clocksource: %d\n", ret);
+
+ ret = setup_irq(0, &timer_irqaction);
+ if (ret)
+ printk("timer: could not request IRQ 0: %d\n", ret);
+}
+
+static struct sysdev_class timer_class = {
+ set_kset_name("timer"),
+};
+
+static struct sys_device timer_device = {
+ .id = 0,
+ .cls = &timer_class,
+};
+
+static int __init init_timer_sysfs(void)
+{
+ int err = sysdev_class_register(&timer_class);
+ if (!err)
+ err = sysdev_register(&timer_device);
+ return err;
+}
+
+device_initcall(init_timer_sysfs);
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
new file mode 100644
index 000000000000..7e803f4d7a12
--- /dev/null
+++ b/arch/avr32/kernel/traps.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#undef DEBUG
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/notifier.h>
+
+#include <asm/traps.h>
+#include <asm/sysreg.h>
+#include <asm/addrspace.h>
+#include <asm/ocd.h>
+#include <asm/mmu_context.h>
+#include <asm/uaccess.h>
+
+static void dump_mem(const char *str, unsigned long bottom, unsigned long top)
+{
+ unsigned long p;
+ int i;
+
+ printk("%s(0x%08lx to 0x%08lx)\n", str, bottom, top);
+
+ for (p = bottom & ~31; p < top; ) {
+ printk("%04lx: ", p & 0xffff);
+
+ for (i = 0; i < 8; i++, p += 4) {
+ unsigned int val;
+
+ if (p < bottom || p >= top)
+ printk(" ");
+ else {
+ if (__get_user(val, (unsigned int __user *)p)) {
+ printk("\n");
+ goto out;
+ }
+ printk("%08x ", val);
+ }
+ }
+ printk("\n");
+ }
+
+out:
+ return;
+}
+
+#ifdef CONFIG_FRAME_POINTER
+static inline void __show_trace(struct task_struct *tsk, unsigned long *sp,
+ struct pt_regs *regs)
+{
+ unsigned long __user *fp;
+ unsigned long __user *last_fp = NULL;
+
+ if (regs) {
+ fp = (unsigned long __user *)regs->r7;
+ } else if (tsk == current) {
+ register unsigned long __user *real_fp __asm__("r7");
+ fp = real_fp;
+ } else {
+ fp = (unsigned long __user *)tsk->thread.cpu_context.r7;
+ }
+
+ /*
+ * Walk the stack until (a) we get an exception, (b) the frame
+ * pointer becomes zero, or (c) the frame pointer gets stuck
+ * at the same value.
+ */
+ while (fp && fp != last_fp) {
+ unsigned long lr, new_fp = 0;
+
+ last_fp = fp;
+ if (__get_user(lr, fp))
+ break;
+ if (fp && __get_user(new_fp, fp + 1))
+ break;
+ fp = (unsigned long __user *)new_fp;
+
+ printk(" [<%08lx>] ", lr);
+ print_symbol("%s\n", lr);
+ }
+ printk("\n");
+}
+#else
+static inline void __show_trace(struct task_struct *tsk, unsigned long *sp,
+ struct pt_regs *regs)
+{
+ unsigned long addr;
+
+ while (!kstack_end(sp)) {
+ addr = *sp++;
+ if (kernel_text_address(addr)) {
+ printk(" [<%08lx>] ", addr);
+ print_symbol("%s\n", addr);
+ }
+ }
+}
+#endif
+
+void show_trace(struct task_struct *tsk, unsigned long *sp,
+ struct pt_regs *regs)
+{
+ if (regs &&
+ (((regs->sr & MODE_MASK) == MODE_EXCEPTION) ||
+ ((regs->sr & MODE_MASK) == MODE_USER)))
+ return;
+
+ printk ("Call trace:");
+#ifdef CONFIG_KALLSYMS
+ printk("\n");
+#endif
+
+ __show_trace(tsk, sp, regs);
+ printk("\n");
+}
+
+void show_stack(struct task_struct *tsk, unsigned long *sp)
+{
+ unsigned long stack;
+
+ if (!tsk)
+ tsk = current;
+ if (sp == 0) {
+ if (tsk == current) {
+ register unsigned long *real_sp __asm__("sp");
+ sp = real_sp;
+ } else {
+ sp = (unsigned long *)tsk->thread.cpu_context.ksp;
+ }
+ }
+
+ stack = (unsigned long)sp;
+ dump_mem("Stack: ", stack,
+ THREAD_SIZE + (unsigned long)tsk->thread_info);
+ show_trace(tsk, sp, NULL);
+}
+
+void dump_stack(void)
+{
+ show_stack(NULL, NULL);
+}
+EXPORT_SYMBOL(dump_stack);
+
+ATOMIC_NOTIFIER_HEAD(avr32_die_chain);
+
+int register_die_notifier(struct notifier_block *nb)
+{
+ pr_debug("register_die_notifier: %p\n", nb);
+
+ return atomic_notifier_chain_register(&avr32_die_chain, nb);
+}
+EXPORT_SYMBOL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&avr32_die_chain, nb);
+}
+EXPORT_SYMBOL(unregister_die_notifier);
+
+static DEFINE_SPINLOCK(die_lock);
+
+void __die(const char *str, struct pt_regs *regs, unsigned long err,
+ const char *file, const char *func, unsigned long line)
+{
+ struct task_struct *tsk = current;
+ static int die_counter;
+
+ console_verbose();
+ spin_lock_irq(&die_lock);
+ bust_spinlocks(1);
+
+ printk(KERN_ALERT "%s", str);
+ if (file && func)
+ printk(" in %s:%s, line %ld", file, func, line);
+ printk("[#%d]:\n", ++die_counter);
+ print_modules();
+ show_regs(regs);
+ printk("Process %s (pid: %d, stack limit = 0x%p)\n",
+ tsk->comm, tsk->pid, tsk->thread_info + 1);
+
+ if (!user_mode(regs) || in_interrupt()) {
+ dump_mem("Stack: ", regs->sp,
+ THREAD_SIZE + (unsigned long)tsk->thread_info);
+ }
+
+ bust_spinlocks(0);
+ spin_unlock_irq(&die_lock);
+ do_exit(SIGSEGV);
+}
+
+void __die_if_kernel(const char *str, struct pt_regs *regs, unsigned long err,
+ const char *file, const char *func, unsigned long line)
+{
+ if (!user_mode(regs))
+ __die(str, regs, err, file, func, line);
+}
+
+asmlinkage void do_nmi(unsigned long ecr, struct pt_regs *regs)
+{
+#ifdef CONFIG_SUBARCH_AVR32B
+ /*
+ * The exception entry always saves RSR_EX. For NMI, this is
+ * wrong; it should be RSR_NMI
+ */
+ regs->sr = sysreg_read(RSR_NMI);
+#endif
+
+ printk("NMI taken!!!!\n");
+ die("NMI", regs, ecr);
+ BUG();
+}
+
+asmlinkage void do_critical_exception(unsigned long ecr, struct pt_regs *regs)
+{
+ printk("Unable to handle critical exception %lu at pc = %08lx!\n",
+ ecr, regs->pc);
+ die("Oops", regs, ecr);
+ BUG();
+}
+
+asmlinkage void do_address_exception(unsigned long ecr, struct pt_regs *regs)
+{
+ siginfo_t info;
+
+ die_if_kernel("Oops: Address exception in kernel mode", regs, ecr);
+
+#ifdef DEBUG
+ if (ecr == ECR_ADDR_ALIGN_X)
+ pr_debug("Instruction Address Exception at pc = %08lx\n",
+ regs->pc);
+ else if (ecr == ECR_ADDR_ALIGN_R)
+ pr_debug("Data Address Exception (Read) at pc = %08lx\n",
+ regs->pc);
+ else if (ecr == ECR_ADDR_ALIGN_W)
+ pr_debug("Data Address Exception (Write) at pc = %08lx\n",
+ regs->pc);
+ else
+ BUG();
+
+ show_regs(regs);
+#endif
+
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_ADRALN;
+ info.si_addr = (void __user *)regs->pc;
+
+ force_sig_info(SIGBUS, &info, current);
+}
+
+/* This way of handling undefined instructions is stolen from ARM */
+static LIST_HEAD(undef_hook);
+static spinlock_t undef_lock = SPIN_LOCK_UNLOCKED;
+
+void register_undef_hook(struct undef_hook *hook)
+{
+ spin_lock_irq(&undef_lock);
+ list_add(&hook->node, &undef_hook);
+ spin_unlock_irq(&undef_lock);
+}
+
+void unregister_undef_hook(struct undef_hook *hook)
+{
+ spin_lock_irq(&undef_lock);
+ list_del(&hook->node);
+ spin_unlock_irq(&undef_lock);
+}
+
+static int do_cop_absent(u32 insn)
+{
+ int cop_nr;
+ u32 cpucr;
+ if ( (insn & 0xfdf00000) == 0xf1900000 )
+ /* LDC0 */
+ cop_nr = 0;
+ else
+ cop_nr = (insn >> 13) & 0x7;
+
+ /* Try enabling the coprocessor */
+ cpucr = sysreg_read(CPUCR);
+ cpucr |= (1 << (24 + cop_nr));
+ sysreg_write(CPUCR, cpucr);
+
+ cpucr = sysreg_read(CPUCR);
+ if ( !(cpucr & (1 << (24 + cop_nr))) ){
+ printk("Coprocessor #%i not found!\n", cop_nr);
+ return -1;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_BUG
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+static inline void do_bug_verbose(struct pt_regs *regs, u32 insn)
+{
+ char *file;
+ u16 line;
+ char c;
+
+ if (__get_user(line, (u16 __user *)(regs->pc + 2)))
+ return;
+ if (__get_user(file, (char * __user *)(regs->pc + 4))
+ || (unsigned long)file < PAGE_OFFSET
+ || __get_user(c, file))
+ file = "<bad filename>";
+
+ printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line);
+}
+#else
+static inline void do_bug_verbose(struct pt_regs *regs, u32 insn)
+{
+
+}
+#endif
+#endif
+
+asmlinkage void do_illegal_opcode(unsigned long ecr, struct pt_regs *regs)
+{
+ u32 insn;
+ struct undef_hook *hook;
+ siginfo_t info;
+ void __user *pc;
+
+ if (!user_mode(regs))
+ goto kernel_trap;
+
+ local_irq_enable();
+
+ pc = (void __user *)instruction_pointer(regs);
+ if (__get_user(insn, (u32 __user *)pc))
+ goto invalid_area;
+
+ if (ecr == ECR_COPROC_ABSENT) {
+ if (do_cop_absent(insn) == 0)
+ return;
+ }
+
+ spin_lock_irq(&undef_lock);
+ list_for_each_entry(hook, &undef_hook, node) {
+ if ((insn & hook->insn_mask) == hook->insn_val) {
+ if (hook->fn(regs, insn) == 0) {
+ spin_unlock_irq(&undef_lock);
+ return;
+ }
+ }
+ }
+ spin_unlock_irq(&undef_lock);
+
+invalid_area:
+
+#ifdef DEBUG
+ printk("Illegal instruction at pc = %08lx\n", regs->pc);
+ if (regs->pc < TASK_SIZE) {
+ unsigned long ptbr, pgd, pte, *p;
+
+ ptbr = sysreg_read(PTBR);
+ p = (unsigned long *)ptbr;
+ pgd = p[regs->pc >> 22];
+ p = (unsigned long *)((pgd & 0x1ffff000) | 0x80000000);
+ pte = p[(regs->pc >> 12) & 0x3ff];
+ printk("page table: 0x%08lx -> 0x%08lx -> 0x%08lx\n", ptbr, pgd, pte);
+ }
+#endif
+
+ info.si_signo = SIGILL;
+ info.si_errno = 0;
+ info.si_addr = (void __user *)regs->pc;
+ switch (ecr) {
+ case ECR_ILLEGAL_OPCODE:
+ case ECR_UNIMPL_INSTRUCTION:
+ info.si_code = ILL_ILLOPC;
+ break;
+ case ECR_PRIVILEGE_VIOLATION:
+ info.si_code = ILL_PRVOPC;
+ break;
+ case ECR_COPROC_ABSENT:
+ info.si_code = ILL_COPROC;
+ break;
+ default:
+ BUG();
+ }
+
+ force_sig_info(SIGILL, &info, current);
+ return;
+
+kernel_trap:
+#ifdef CONFIG_BUG
+ if (__kernel_text_address(instruction_pointer(regs))) {
+ insn = *(u16 *)instruction_pointer(regs);
+ if (insn == AVR32_BUG_OPCODE) {
+ do_bug_verbose(regs, insn);
+ die("Kernel BUG", regs, 0);
+ return;
+ }
+ }
+#endif
+
+ die("Oops: Illegal instruction in kernel code", regs, ecr);
+}
+
+asmlinkage void do_fpe(unsigned long ecr, struct pt_regs *regs)
+{
+ siginfo_t info;
+
+ printk("Floating-point exception at pc = %08lx\n", regs->pc);
+
+ /* We have no FPU... */
+ info.si_signo = SIGILL;
+ info.si_errno = 0;
+ info.si_addr = (void __user *)regs->pc;
+ info.si_code = ILL_COPROC;
+
+ force_sig_info(SIGILL, &info, current);
+}
+
+
+void __init trap_init(void)
+{
+
+}
diff --git a/arch/avr32/kernel/vmlinux.lds.c b/arch/avr32/kernel/vmlinux.lds.c
new file mode 100644
index 000000000000..cdd627c6b7dc
--- /dev/null
+++ b/arch/avr32/kernel/vmlinux.lds.c
@@ -0,0 +1,139 @@
+/*
+ * AVR32 linker script for the Linux kernel
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define LOAD_OFFSET 0x00000000
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf32-avr32", "elf32-avr32", "elf32-avr32")
+OUTPUT_ARCH(avr32)
+ENTRY(_start)
+
+/* Big endian */
+jiffies = jiffies_64 + 4;
+
+SECTIONS
+{
+ . = CONFIG_ENTRY_ADDRESS;
+ .init : AT(ADDR(.init) - LOAD_OFFSET) {
+ _stext = .;
+ __init_begin = .;
+ _sinittext = .;
+ *(.text.reset)
+ *(.init.text)
+ _einittext = .;
+ . = ALIGN(4);
+ __tagtable_begin = .;
+ *(.taglist)
+ __tagtable_end = .;
+ *(.init.data)
+ . = ALIGN(16);
+ __setup_start = .;
+ *(.init.setup)
+ __setup_end = .;
+ . = ALIGN(4);
+ __initcall_start = .;
+ *(.initcall1.init)
+ *(.initcall2.init)
+ *(.initcall3.init)
+ *(.initcall4.init)
+ *(.initcall5.init)
+ *(.initcall6.init)
+ *(.initcall7.init)
+ __initcall_end = .;
+ __con_initcall_start = .;
+ *(.con_initcall.init)
+ __con_initcall_end = .;
+ __security_initcall_start = .;
+ *(.security_initcall.init)
+ __security_initcall_end = .;
+ . = ALIGN(32);
+ __initramfs_start = .;
+ *(.init.ramfs)
+ __initramfs_end = .;
+ . = ALIGN(4096);
+ __init_end = .;
+ }
+
+ . = ALIGN(8192);
+ .text : AT(ADDR(.text) - LOAD_OFFSET) {
+ _evba = .;
+ _text = .;
+ *(.ex.text)
+ . = 0x50;
+ *(.tlbx.ex.text)
+ . = 0x60;
+ *(.tlbr.ex.text)
+ . = 0x70;
+ *(.tlbw.ex.text)
+ . = 0x100;
+ *(.scall.text)
+ *(.irq.text)
+ *(.text)
+ SCHED_TEXT
+ LOCK_TEXT
+ KPROBES_TEXT
+ *(.fixup)
+ *(.gnu.warning)
+ _etext = .;
+ } = 0xd703d703
+
+ . = ALIGN(4);
+ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+ __start___ex_table = .;
+ *(__ex_table)
+ __stop___ex_table = .;
+ }
+
+ RODATA
+
+ . = ALIGN(8192);
+
+ .data : AT(ADDR(.data) - LOAD_OFFSET) {
+ _data = .;
+ _sdata = .;
+ /*
+ * First, the init task union, aligned to an 8K boundary.
+ */
+ *(.data.init_task)
+
+ /* Then, the cacheline aligned data */
+ . = ALIGN(32);
+ *(.data.cacheline_aligned)
+
+ /* And the rest... */
+ *(.data.rel*)
+ *(.data)
+ CONSTRUCTORS
+
+ _edata = .;
+ }
+
+
+ . = ALIGN(8);
+ .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+ __bss_start = .;
+ *(.bss)
+ *(COMMON)
+ . = ALIGN(8);
+ __bss_stop = .;
+ _end = .;
+ }
+
+ /* When something in the kernel is NOT compiled as a module, the module
+ * cleanup code and data are put into these segments. Both can then be
+ * thrown away, as cleanup code is never called unless it's a module.
+ */
+ /DISCARD/ : {
+ *(.exit.text)
+ *(.exit.data)
+ *(.exitcall.exit)
+ }
+
+ DWARF_DEBUG
+}
diff --git a/arch/avr32/lib/Makefile b/arch/avr32/lib/Makefile
new file mode 100644
index 000000000000..09ac43e40522
--- /dev/null
+++ b/arch/avr32/lib/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for AVR32-specific library files
+#
+
+lib-y := copy_user.o clear_user.o
+lib-y += strncpy_from_user.o strnlen_user.o
+lib-y += delay.o memset.o memcpy.o findbit.o
+lib-y += csum_partial.o csum_partial_copy_generic.o
+lib-y += io-readsw.o io-readsl.o io-writesw.o io-writesl.o
+lib-y += __avr32_lsl64.o __avr32_lsr64.o __avr32_asr64.o
diff --git a/arch/avr32/lib/__avr32_asr64.S b/arch/avr32/lib/__avr32_asr64.S
new file mode 100644
index 000000000000..368b6bca4c76
--- /dev/null
+++ b/arch/avr32/lib/__avr32_asr64.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+ /*
+ * DWtype __avr32_asr64(DWtype u, word_type b)
+ */
+ .text
+ .global __avr32_asr64
+ .type __avr32_asr64,@function
+__avr32_asr64:
+ cp.w r12, 0
+ reteq r12
+
+ rsub r9, r12, 32
+ brle 1f
+
+ lsl r8, r11, r9
+ lsr r10, r10, r12
+ asr r11, r11, r12
+ or r10, r8
+ retal r12
+
+1: neg r9
+ asr r10, r11, r9
+ asr r11, 31
+ retal r12
diff --git a/arch/avr32/lib/__avr32_lsl64.S b/arch/avr32/lib/__avr32_lsl64.S
new file mode 100644
index 000000000000..f1dbc2b36257
--- /dev/null
+++ b/arch/avr32/lib/__avr32_lsl64.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+ /*
+ * DWtype __avr32_lsl64(DWtype u, word_type b)
+ */
+ .text
+ .global __avr32_lsl64
+ .type __avr32_lsl64,@function
+__avr32_lsl64:
+ cp.w r12, 0
+ reteq r12
+
+ rsub r9, r12, 32
+ brle 1f
+
+ lsr r8, r10, r9
+ lsl r10, r10, r12
+ lsl r11, r11, r12
+ or r11, r8
+ retal r12
+
+1: neg r9
+ lsl r11, r10, r9
+ mov r10, 0
+ retal r12
diff --git a/arch/avr32/lib/__avr32_lsr64.S b/arch/avr32/lib/__avr32_lsr64.S
new file mode 100644
index 000000000000..e65bb7f0d24c
--- /dev/null
+++ b/arch/avr32/lib/__avr32_lsr64.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+ /*
+ * DWtype __avr32_lsr64(DWtype u, word_type b)
+ */
+ .text
+ .global __avr32_lsr64
+ .type __avr32_lsr64,@function
+__avr32_lsr64:
+ cp.w r12, 0
+ reteq r12
+
+ rsub r9, r12, 32
+ brle 1f
+
+ lsl r8, r11, r9
+ lsr r11, r11, r12
+ lsr r10, r10, r12
+ or r10, r8
+ retal r12
+
+1: neg r9
+ lsr r10, r11, r9
+ mov r11, 0
+ retal r12
diff --git a/arch/avr32/lib/clear_user.S b/arch/avr32/lib/clear_user.S
new file mode 100644
index 000000000000..d8991b6f8eb7
--- /dev/null
+++ b/arch/avr32/lib/clear_user.S
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+ .text
+ .align 1
+ .global clear_user
+ .type clear_user, "function"
+clear_user:
+ branch_if_kernel r8, __clear_user
+ ret_if_privileged r8, r12, r11, r11
+
+ .global __clear_user
+ .type __clear_user, "function"
+__clear_user:
+ mov r9, r12
+ mov r8, 0
+ andl r9, 3, COH
+ brne 5f
+
+1: sub r11, 4
+ brlt 2f
+
+10: st.w r12++, r8
+ sub r11, 4
+ brge 10b
+
+2: sub r11, -4
+ reteq 0
+
+ /* Unaligned count or address */
+ bld r11, 1
+ brcc 12f
+11: st.h r12++, r8
+ sub r11, 2
+ reteq 0
+12: st.b r12++, r8
+ retal 0
+
+ /* Unaligned address */
+5: cp.w r11, 4
+ brlt 2b
+
+ lsl r9, 2
+ add pc, pc, r9
+13: st.b r12++, r8
+ sub r11, 1
+14: st.b r12++, r8
+ sub r11, 1
+15: st.b r12++, r8
+ sub r11, 1
+ rjmp 1b
+
+ .size clear_user, . - clear_user
+ .size __clear_user, . - __clear_user
+
+ .section .fixup, "ax"
+ .align 1
+18: sub r11, -4
+19: retal r11
+
+ .section __ex_table, "a"
+ .align 2
+ .long 10b, 18b
+ .long 11b, 19b
+ .long 12b, 19b
+ .long 13b, 19b
+ .long 14b, 19b
+ .long 15b, 19b
diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S
new file mode 100644
index 000000000000..ea59c04b07de
--- /dev/null
+++ b/arch/avr32/lib/copy_user.S
@@ -0,0 +1,119 @@
+/*
+ * Copy to/from userspace with optional address space checking.
+ *
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+ /*
+ * __kernel_size_t
+ * __copy_user(void *to, const void *from, __kernel_size_t n)
+ *
+ * Returns the number of bytes not copied. Might be off by
+ * max 3 bytes if we get a fault in the main loop.
+ *
+ * The address-space checking functions simply fall through to
+ * the non-checking version.
+ */
+ .text
+ .align 1
+ .global copy_from_user
+ .type copy_from_user, @function
+copy_from_user:
+ branch_if_kernel r8, __copy_user
+ ret_if_privileged r8, r11, r10, r10
+ rjmp __copy_user
+ .size copy_from_user, . - copy_from_user
+
+ .global copy_to_user
+ .type copy_to_user, @function
+copy_to_user:
+ branch_if_kernel r8, __copy_user
+ ret_if_privileged r8, r12, r10, r10
+ .size copy_to_user, . - copy_to_user
+
+ .global __copy_user
+ .type __copy_user, @function
+__copy_user:
+ mov r9, r11
+ andl r9, 3, COH
+ brne 6f
+
+ /* At this point, from is word-aligned */
+1: sub r10, 4
+ brlt 3f
+
+2:
+10: ld.w r8, r11++
+11: st.w r12++, r8
+ sub r10, 4
+ brge 2b
+
+3: sub r10, -4
+ reteq 0
+
+ /*
+ * Handle unaligned count. Need to be careful with r10 here so
+ * that we return the correct value even if we get a fault
+ */
+4:
+20: ld.ub r8, r11++
+21: st.b r12++, r8
+ sub r10, 1
+ reteq 0
+22: ld.ub r8, r11++
+23: st.b r12++, r8
+ sub r10, 1
+ reteq 0
+24: ld.ub r8, r11++
+25: st.b r12++, r8
+ retal 0
+
+ /* Handle unaligned from-pointer */
+6: cp.w r10, 4
+ brlt 4b
+ rsub r9, r9, 4
+
+30: ld.ub r8, r11++
+31: st.b r12++, r8
+ sub r10, 1
+ sub r9, 1
+ breq 1b
+32: ld.ub r8, r11++
+33: st.b r12++, r8
+ sub r10, 1
+ sub r9, 1
+ breq 1b
+34: ld.ub r8, r11++
+35: st.b r12++, r8
+ sub r10, 1
+ rjmp 1b
+ .size __copy_user, . - __copy_user
+
+ .section .fixup,"ax"
+ .align 1
+19: sub r10, -4
+29: retal r10
+
+ .section __ex_table,"a"
+ .align 2
+ .long 10b, 19b
+ .long 11b, 19b
+ .long 20b, 29b
+ .long 21b, 29b
+ .long 22b, 29b
+ .long 23b, 29b
+ .long 24b, 29b
+ .long 25b, 29b
+ .long 30b, 29b
+ .long 31b, 29b
+ .long 32b, 29b
+ .long 33b, 29b
+ .long 34b, 29b
+ .long 35b, 29b
diff --git a/arch/avr32/lib/csum_partial.S b/arch/avr32/lib/csum_partial.S
new file mode 100644
index 000000000000..6a262b528eb7
--- /dev/null
+++ b/arch/avr32/lib/csum_partial.S
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+ /*
+ * unsigned int csum_partial(const unsigned char *buff,
+ * int len, unsigned int sum)
+ */
+ .text
+ .global csum_partial
+ .type csum_partial,"function"
+ .align 1
+csum_partial:
+ /* checksum complete words, aligned or not */
+3: sub r11, 4
+ brlt 5f
+4: ld.w r9, r12++
+ add r10, r9
+ acr r10
+ sub r11, 4
+ brge 4b
+
+ /* return if we had a whole number of words */
+5: sub r11, -4
+ reteq r10
+
+ /* checksum any remaining bytes at the end */
+ mov r9, 0
+ mov r8, 0
+ cp r11, 2
+ brlt 6f
+ ld.uh r9, r12++
+ sub r11, 2
+ breq 7f
+ lsl r9, 16
+6: ld.ub r8, r12++
+ lsl r8, 8
+7: or r9, r8
+ add r10, r9
+ acr r10
+
+ retal r10
+ .size csum_partial, . - csum_partial
diff --git a/arch/avr32/lib/csum_partial_copy_generic.S b/arch/avr32/lib/csum_partial_copy_generic.S
new file mode 100644
index 000000000000..a3a0f9b8929c
--- /dev/null
+++ b/arch/avr32/lib/csum_partial_copy_generic.S
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/errno.h>
+#include <asm/asm.h>
+
+ /*
+ * unsigned int csum_partial_copy_generic(const char *src, char *dst, int len
+ * int sum, int *src_err_ptr,
+ * int *dst_err_ptr)
+ *
+ * Copy src to dst while checksumming, otherwise like csum_partial.
+ */
+
+ .macro ld_src size, reg, ptr
+9999: ld.\size \reg, \ptr
+ .section __ex_table, "a"
+ .long 9999b, fixup_ld_src
+ .previous
+ .endm
+
+ .macro st_dst size, ptr, reg
+9999: st.\size \ptr, \reg
+ .section __ex_table, "a"
+ .long 9999b, fixup_st_dst
+ .previous
+ .endm
+
+ .text
+ .global csum_partial_copy_generic
+ .type csum_partial_copy_generic,"function"
+ .align 1
+csum_partial_copy_generic:
+ pushm r4-r7,lr
+
+ /* The inner loop */
+1: sub r10, 4
+ brlt 5f
+2: ld_src w, r5, r12++
+ st_dst w, r11++, r5
+ add r9, r5
+ acr r9
+ sub r10, 4
+ brge 2b
+
+ /* return if we had a whole number of words */
+5: sub r10, -4
+ brne 7f
+
+6: mov r12, r9
+ popm r4-r7,pc
+
+ /* handle additional bytes at the tail */
+7: mov r5, 0
+ mov r4, 32
+8: ld_src ub, r6, r12++
+ st_dst b, r11++, r6
+ lsl r5, 8
+ sub r4, 8
+ bfins r5, r6, 0, 8
+ sub r10, 1
+ brne 8b
+
+ lsl r5, r5, r4
+ add r9, r5
+ acr r9
+ rjmp 6b
+
+ /* Exception handler */
+ .section .fixup,"ax"
+ .align 1
+fixup_ld_src:
+ mov r9, -EFAULT
+ cp.w r8, 0
+ breq 1f
+ st.w r8[0], r9
+
+1: /*
+ * TODO: zero the complete destination - computing the rest
+ * is too much work
+ */
+
+ mov r9, 0
+ rjmp 6b
+
+fixup_st_dst:
+ mov r9, -EFAULT
+ lddsp r8, sp[20]
+ cp.w r8, 0
+ breq 1f
+ st.w r8[0], r9
+1: mov r9, 0
+ rjmp 6b
+
+ .previous
diff --git a/arch/avr32/lib/delay.c b/arch/avr32/lib/delay.c
new file mode 100644
index 000000000000..462c8307b680
--- /dev/null
+++ b/arch/avr32/lib/delay.c
@@ -0,0 +1,55 @@
+/*
+ * Precise Delay Loops for avr32
+ *
+ * Copyright (C) 1993 Linus Torvalds
+ * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <asm/delay.h>
+#include <asm/processor.h>
+#include <asm/sysreg.h>
+
+int read_current_timer(unsigned long *timer_value)
+{
+ *timer_value = sysreg_read(COUNT);
+ return 0;
+}
+
+void __delay(unsigned long loops)
+{
+ unsigned bclock, now;
+
+ bclock = sysreg_read(COUNT);
+ do {
+ now = sysreg_read(COUNT);
+ } while ((now - bclock) < loops);
+}
+
+inline void __const_udelay(unsigned long xloops)
+{
+ unsigned long long loops;
+
+ asm("mulu.d %0, %1, %2"
+ : "=r"(loops)
+ : "r"(current_cpu_data.loops_per_jiffy * HZ), "r"(xloops));
+ __delay(loops >> 32);
+}
+
+void __udelay(unsigned long usecs)
+{
+ __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
+}
+
+void __ndelay(unsigned long nsecs)
+{
+ __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
+}
diff --git a/arch/avr32/lib/findbit.S b/arch/avr32/lib/findbit.S
new file mode 100644
index 000000000000..2b4856f4bf7c
--- /dev/null
+++ b/arch/avr32/lib/findbit.S
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+
+ .text
+ /*
+ * unsigned long find_first_zero_bit(const unsigned long *addr,
+ * unsigned long size)
+ */
+ENTRY(find_first_zero_bit)
+ cp.w r11, 0
+ reteq r11
+ mov r9, r11
+1: ld.w r8, r12[0]
+ com r8
+ brne .L_found
+ sub r12, -4
+ sub r9, 32
+ brgt 1b
+ retal r11
+
+ /*
+ * unsigned long find_next_zero_bit(const unsigned long *addr,
+ * unsigned long size,
+ * unsigned long offset)
+ */
+ENTRY(find_next_zero_bit)
+ lsr r8, r10, 5
+ sub r9, r11, r10
+ retle r11
+
+ lsl r8, 2
+ add r12, r8
+ andl r10, 31, COH
+ breq 1f
+
+ /* offset is not word-aligned. Handle the first (32 - r10) bits */
+ ld.w r8, r12[0]
+ com r8
+ sub r12, -4
+ lsr r8, r8, r10
+ brne .L_found
+
+ /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+ add r9, r10
+ sub r9, 32
+ retle r11
+
+ /* Main loop. offset must be word-aligned */
+1: ld.w r8, r12[0]
+ com r8
+ brne .L_found
+ sub r12, -4
+ sub r9, 32
+ brgt 1b
+ retal r11
+
+ /* Common return path for when a bit is actually found. */
+.L_found:
+ brev r8
+ clz r10, r8
+ rsub r9, r11
+ add r10, r9
+
+ /* XXX: If we don't have to return exactly "size" when the bit
+ is not found, we may drop this "min" thing */
+ min r12, r11, r10
+ retal r12
+
+ /*
+ * unsigned long find_first_bit(const unsigned long *addr,
+ * unsigned long size)
+ */
+ENTRY(find_first_bit)
+ cp.w r11, 0
+ reteq r11
+ mov r9, r11
+1: ld.w r8, r12[0]
+ cp.w r8, 0
+ brne .L_found
+ sub r12, -4
+ sub r9, 32
+ brgt 1b
+ retal r11
+
+ /*
+ * unsigned long find_next_bit(const unsigned long *addr,
+ * unsigned long size,
+ * unsigned long offset)
+ */
+ENTRY(find_next_bit)
+ lsr r8, r10, 5
+ sub r9, r11, r10
+ retle r11
+
+ lsl r8, 2
+ add r12, r8
+ andl r10, 31, COH
+ breq 1f
+
+ /* offset is not word-aligned. Handle the first (32 - r10) bits */
+ ld.w r8, r12[0]
+ sub r12, -4
+ lsr r8, r8, r10
+ brne .L_found
+
+ /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+ add r9, r10
+ sub r9, 32
+ retle r11
+
+ /* Main loop. offset must be word-aligned */
+1: ld.w r8, r12[0]
+ cp.w r8, 0
+ brne .L_found
+ sub r12, -4
+ sub r9, 32
+ brgt 1b
+ retal r11
+
+ENTRY(generic_find_next_zero_le_bit)
+ lsr r8, r10, 5
+ sub r9, r11, r10
+ retle r11
+
+ lsl r8, 2
+ add r12, r8
+ andl r10, 31, COH
+ breq 1f
+
+ /* offset is not word-aligned. Handle the first (32 - r10) bits */
+ ldswp.w r8, r12[0]
+ sub r12, -4
+ lsr r8, r8, r10
+ brne .L_found
+
+ /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+ add r9, r10
+ sub r9, 32
+ retle r11
+
+ /* Main loop. offset must be word-aligned */
+1: ldswp.w r8, r12[0]
+ cp.w r8, 0
+ brne .L_found
+ sub r12, -4
+ sub r9, 32
+ brgt 1b
+ retal r11
diff --git a/arch/avr32/lib/io-readsl.S b/arch/avr32/lib/io-readsl.S
new file mode 100644
index 000000000000..b103511ed6c4
--- /dev/null
+++ b/arch/avr32/lib/io-readsl.S
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+ .global __raw_readsl
+ .type __raw_readsl,@function
+__raw_readsl:
+ cp.w r10, 0
+ reteq r12
+
+ /*
+ * If r11 isn't properly aligned, we might get an exception on
+ * some implementations. But there's not much we can do about it.
+ */
+1: ld.w r8, r12[0]
+ sub r10, 1
+ st.w r11++, r8
+ brne 1b
+
+ retal r12
diff --git a/arch/avr32/lib/io-readsw.S b/arch/avr32/lib/io-readsw.S
new file mode 100644
index 000000000000..456be9909027
--- /dev/null
+++ b/arch/avr32/lib/io-readsw.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+.Lnot_word_aligned:
+ /*
+ * Bad alignment will cause a hardware exception, which is as
+ * good as anything. No need for us to check for proper alignment.
+ */
+ ld.uh r8, r12[0]
+ sub r10, 1
+ st.h r11++, r8
+
+ /* fall through */
+
+ .global __raw_readsw
+ .type __raw_readsw,@function
+__raw_readsw:
+ cp.w r10, 0
+ reteq r12
+ mov r9, 3
+ tst r11, r9
+ brne .Lnot_word_aligned
+
+ sub r10, 2
+ brlt 2f
+
+1: ldins.h r8:t, r12[0]
+ ldins.h r8:b, r12[0]
+ st.w r11++, r8
+ sub r10, 2
+ brge 1b
+
+2: sub r10, -2
+ reteq r12
+
+ ld.uh r8, r12[0]
+ st.h r11++, r8
+ retal r12
diff --git a/arch/avr32/lib/io-writesl.S b/arch/avr32/lib/io-writesl.S
new file mode 100644
index 000000000000..22138b3a16e5
--- /dev/null
+++ b/arch/avr32/lib/io-writesl.S
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+ .global __raw_writesl
+ .type __raw_writesl,@function
+__raw_writesl:
+ cp.w r10, 0
+ reteq r12
+
+1: ld.w r8, r11++
+ sub r10, 1
+ st.w r12[0], r8
+ brne 1b
+
+ retal r12
diff --git a/arch/avr32/lib/io-writesw.S b/arch/avr32/lib/io-writesw.S
new file mode 100644
index 000000000000..8c4a53f1c52a
--- /dev/null
+++ b/arch/avr32/lib/io-writesw.S
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+.Lnot_word_aligned:
+ ld.uh r8, r11++
+ sub r10, 1
+ st.h r12[0], r8
+
+ .global __raw_writesw
+ .type __raw_writesw,@function
+__raw_writesw:
+ cp.w r10, 0
+ mov r9, 3
+ reteq r12
+ tst r11, r9
+ brne .Lnot_word_aligned
+
+ sub r10, 2
+ brlt 2f
+
+1: ld.w r8, r11++
+ bfextu r9, r8, 16, 16
+ st.h r12[0], r9
+ st.h r12[0], r8
+ sub r10, 2
+ brge 1b
+
+2: sub r10, -2
+ reteq r12
+
+ ld.uh r8, r11++
+ st.h r12[0], r8
+ retal r12
diff --git a/arch/avr32/lib/libgcc.h b/arch/avr32/lib/libgcc.h
new file mode 100644
index 000000000000..5a091b5e3618
--- /dev/null
+++ b/arch/avr32/lib/libgcc.h
@@ -0,0 +1,33 @@
+/* Definitions for various functions 'borrowed' from gcc-3.4.3 */
+
+#define BITS_PER_UNIT 8
+
+typedef int QItype __attribute__ ((mode (QI)));
+typedef unsigned int UQItype __attribute__ ((mode (QI)));
+typedef int HItype __attribute__ ((mode (HI)));
+typedef unsigned int UHItype __attribute__ ((mode (HI)));
+typedef int SItype __attribute__ ((mode (SI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+typedef int DItype __attribute__ ((mode (DI)));
+typedef unsigned int UDItype __attribute__ ((mode (DI)));
+typedef float SFtype __attribute__ ((mode (SF)));
+typedef float DFtype __attribute__ ((mode (DF)));
+typedef int word_type __attribute__ ((mode (__word__)));
+
+#define W_TYPE_SIZE (4 * BITS_PER_UNIT)
+#define Wtype SItype
+#define UWtype USItype
+#define HWtype SItype
+#define UHWtype USItype
+#define DWtype DItype
+#define UDWtype UDItype
+#define __NW(a,b) __ ## a ## si ## b
+#define __NDW(a,b) __ ## a ## di ## b
+
+struct DWstruct {Wtype high, low;};
+
+typedef union
+{
+ struct DWstruct s;
+ DWtype ll;
+} DWunion;
diff --git a/arch/avr32/lib/longlong.h b/arch/avr32/lib/longlong.h
new file mode 100644
index 000000000000..cd5e369ac437
--- /dev/null
+++ b/arch/avr32/lib/longlong.h
@@ -0,0 +1,98 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+ Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
+ Free Software Foundation, Inc.
+
+ This definition file is free software; you can redistribute it
+ and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2, or (at your option) any later version.
+
+ This definition file is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* Borrowed from gcc-3.4.3 */
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+#define count_leading_zeros(count, x) ((count) = __builtin_clz(x))
+
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+ do { \
+ UWtype __d1, __d0, __q1, __q0; \
+ UWtype __r1, __r0, __m; \
+ __d1 = __ll_highpart (d); \
+ __d0 = __ll_lowpart (d); \
+ \
+ __r1 = (n1) % __d1; \
+ __q1 = (n1) / __d1; \
+ __m = (UWtype) __q1 * __d0; \
+ __r1 = __r1 * __ll_B | __ll_highpart (n0); \
+ if (__r1 < __m) \
+ { \
+ __q1--, __r1 += (d); \
+ if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+ if (__r1 < __m) \
+ __q1--, __r1 += (d); \
+ } \
+ __r1 -= __m; \
+ \
+ __r0 = __r1 % __d1; \
+ __q0 = __r1 / __d1; \
+ __m = (UWtype) __q0 * __d0; \
+ __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
+ if (__r0 < __m) \
+ { \
+ __q0--, __r0 += (d); \
+ if (__r0 >= (d)) \
+ if (__r0 < __m) \
+ __q0--, __r0 += (d); \
+ } \
+ __r0 -= __m; \
+ \
+ (q) = (UWtype) __q1 * __ll_B | __q0; \
+ (r) = __r0; \
+ } while (0)
+
+#define udiv_qrnnd __udiv_qrnnd_c
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ UWtype __x; \
+ __x = (al) - (bl); \
+ (sh) = (ah) - (bh) - (__x > (al)); \
+ (sl) = __x; \
+ } while (0)
+
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __x0, __x1, __x2, __x3; \
+ UHWtype __ul, __vl, __uh, __vh; \
+ \
+ __ul = __ll_lowpart (u); \
+ __uh = __ll_highpart (u); \
+ __vl = __ll_lowpart (v); \
+ __vh = __ll_highpart (v); \
+ \
+ __x0 = (UWtype) __ul * __vl; \
+ __x1 = (UWtype) __ul * __vh; \
+ __x2 = (UWtype) __uh * __vl; \
+ __x3 = (UWtype) __uh * __vh; \
+ \
+ __x1 += __ll_highpart (__x0);/* this can't give carry */ \
+ __x1 += __x2; /* but this indeed can */ \
+ if (__x1 < __x2) /* did we get it? */ \
+ __x3 += __ll_B; /* yes, add it in the proper pos. */ \
+ \
+ (w1) = __x3 + __ll_highpart (__x1); \
+ (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
+ } while (0)
diff --git a/arch/avr32/lib/memcpy.S b/arch/avr32/lib/memcpy.S
new file mode 100644
index 000000000000..0abb26142b64
--- /dev/null
+++ b/arch/avr32/lib/memcpy.S
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+ /*
+ * void *memcpy(void *to, const void *from, unsigned long n)
+ *
+ * This implementation does word-aligned loads in the main loop,
+ * possibly sacrificing alignment of stores.
+ *
+ * Hopefully, in most cases, both "to" and "from" will be
+ * word-aligned to begin with.
+ */
+ .text
+ .global memcpy
+ .type memcpy, @function
+memcpy:
+ mov r9, r11
+ andl r9, 3, COH
+ brne 1f
+
+ /* At this point, "from" is word-aligned */
+2: sub r10, 4
+ mov r9, r12
+ brlt 4f
+
+3: ld.w r8, r11++
+ sub r10, 4
+ st.w r12++, r8
+ brge 3b
+
+4: neg r10
+ reteq r9
+
+ /* Handle unaligned count */
+ lsl r10, 2
+ add pc, pc, r10
+ ld.ub r8, r11++
+ st.b r12++, r8
+ ld.ub r8, r11++
+ st.b r12++, r8
+ ld.ub r8, r11++
+ st.b r12++, r8
+ retal r9
+
+ /* Handle unaligned "from" pointer */
+1: sub r10, 4
+ brlt 4b
+ add r10, r9
+ lsl r9, 2
+ add pc, pc, r9
+ ld.ub r8, r11++
+ st.b r12++, r8
+ ld.ub r8, r11++
+ st.b r12++, r8
+ ld.ub r8, r11++
+ st.b r12++, r8
+ rjmp 2b
diff --git a/arch/avr32/lib/memset.S b/arch/avr32/lib/memset.S
new file mode 100644
index 000000000000..40da32c0480c
--- /dev/null
+++ b/arch/avr32/lib/memset.S
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/arm/lib/memset.S
+ * Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * ASM optimised string functions
+ */
+#include <asm/asm.h>
+
+ /*
+ * r12: void *b
+ * r11: int c
+ * r10: size_t len
+ *
+ * Returns b in r12
+ */
+ .text
+ .global memset
+ .type memset, @function
+ .align 5
+memset:
+ mov r9, r12
+ mov r8, r12
+ or r11, r11, r11 << 8
+ andl r9, 3, COH
+ brne 1f
+
+2: or r11, r11, r11 << 16
+ sub r10, 4
+ brlt 5f
+
+ /* Let's do some real work */
+4: st.w r8++, r11
+ sub r10, 4
+ brge 4b
+
+ /*
+ * When we get here, we've got less than 4 bytes to set. r10
+ * might be negative.
+ */
+5: sub r10, -4
+ reteq r12
+
+ /* Fastpath ends here, exactly 32 bytes from memset */
+
+ /* Handle unaligned count or pointer */
+ bld r10, 1
+ brcc 6f
+ st.b r8++, r11
+ st.b r8++, r11
+ bld r10, 0
+ retcc r12
+6: st.b r8++, r11
+ retal r12
+
+ /* Handle unaligned pointer */
+1: sub r10, 4
+ brlt 5b
+ add r10, r9
+ lsl r9, 1
+ add pc, r9
+ st.b r8++, r11
+ st.b r8++, r11
+ st.b r8++, r11
+ rjmp 2b
+
+ .size memset, . - memset
diff --git a/arch/avr32/lib/strncpy_from_user.S b/arch/avr32/lib/strncpy_from_user.S
new file mode 100644
index 000000000000..72bd50599ec6
--- /dev/null
+++ b/arch/avr32/lib/strncpy_from_user.S
@@ -0,0 +1,60 @@
+/*
+ * Copy to/from userspace with optional address space checking.
+ *
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/errno.h>
+
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+ /*
+ * long strncpy_from_user(char *dst, const char *src, long count)
+ *
+ * On success, returns the length of the string, not including
+ * the terminating NUL.
+ *
+ * If the string is longer than count, returns count
+ *
+ * If userspace access fails, returns -EFAULT
+ */
+ .text
+ .align 1
+ .global strncpy_from_user
+ .type strncpy_from_user, "function"
+strncpy_from_user:
+ mov r9, -EFAULT
+ branch_if_kernel r8, __strncpy_from_user
+ ret_if_privileged r8, r11, r10, r9
+
+ .global __strncpy_from_user
+ .type __strncpy_from_user, "function"
+__strncpy_from_user:
+ cp.w r10, 0
+ reteq 0
+
+ mov r9, r10
+
+1: ld.ub r8, r11++
+ st.b r12++, r8
+ cp.w r8, 0
+ breq 2f
+ sub r9, 1
+ brne 1b
+
+2: sub r10, r9
+ retal r10
+
+ .section .fixup, "ax"
+ .align 1
+3: mov r12, -EFAULT
+ retal r12
+
+ .section __ex_table, "a"
+ .align 2
+ .long 1b, 3b
diff --git a/arch/avr32/lib/strnlen_user.S b/arch/avr32/lib/strnlen_user.S
new file mode 100644
index 000000000000..65ce11afa66a
--- /dev/null
+++ b/arch/avr32/lib/strnlen_user.S
@@ -0,0 +1,67 @@
+/*
+ * Copy to/from userspace with optional address space checking.
+ *
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/processor.h>
+#include <asm/asm.h>
+
+ .text
+ .align 1
+ .global strnlen_user
+ .type strnlen_user, "function"
+strnlen_user:
+ branch_if_kernel r8, __strnlen_user
+ sub r8, r11, 1
+ add r8, r12
+ retcs 0
+ brmi adjust_length /* do a closer inspection */
+
+ .global __strnlen_user
+ .type __strnlen_user, "function"
+__strnlen_user:
+ mov r10, r12
+
+10: ld.ub r8, r12++
+ cp.w r8, 0
+ breq 2f
+ sub r11, 1
+ brne 10b
+
+ sub r12, -1
+2: sub r12, r10
+ retal r12
+
+
+ .type adjust_length, "function"
+adjust_length:
+ cp.w r12, 0 /* addr must always be < TASK_SIZE */
+ retmi 0
+
+ pushm lr
+ lddpc lr, _task_size
+ sub r11, lr, r12
+ mov r9, r11
+ rcall __strnlen_user
+ cp.w r12, r9
+ brgt 1f
+ popm pc
+1: popm pc, r12=0
+
+ .align 2
+_task_size:
+ .long TASK_SIZE
+
+ .section .fixup, "ax"
+ .align 1
+19: retal 0
+
+ .section __ex_table, "a"
+ .align 2
+ .long 10b, 19b
diff --git a/arch/avr32/mach-at32ap/Makefile b/arch/avr32/mach-at32ap/Makefile
new file mode 100644
index 000000000000..f62eb6915510
--- /dev/null
+++ b/arch/avr32/mach-at32ap/Makefile
@@ -0,0 +1,2 @@
+obj-y += at32ap.o clock.o pio.o intc.o extint.o hsmc.o
+obj-$(CONFIG_CPU_AT32AP7000) += at32ap7000.o
diff --git a/arch/avr32/mach-at32ap/at32ap.c b/arch/avr32/mach-at32ap/at32ap.c
new file mode 100644
index 000000000000..f7cedf5aabea
--- /dev/null
+++ b/arch/avr32/mach-at32ap/at32ap.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+
+#include <asm/arch/init.h>
+#include <asm/arch/sm.h>
+
+struct at32_sm system_manager;
+
+static int __init at32_sm_init(void)
+{
+ struct resource *regs;
+ struct at32_sm *sm = &system_manager;
+ int ret = -ENXIO;
+
+ regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0);
+ if (!regs)
+ goto fail;
+
+ spin_lock_init(&sm->lock);
+ sm->pdev = &at32_sm_device;
+
+ ret = -ENOMEM;
+ sm->regs = ioremap(regs->start, regs->end - regs->start + 1);
+ if (!sm->regs)
+ goto fail;
+
+ return 0;
+
+fail:
+ printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret);
+ return ret;
+}
+
+void __init setup_platform(void)
+{
+ at32_sm_init();
+ at32_clock_init();
+ at32_portmux_init();
+
+ /* FIXME: This doesn't belong here */
+ at32_setup_serial_console(1);
+}
+
+static int __init pdc_probe(struct platform_device *pdev)
+{
+ struct clk *pclk, *hclk;
+
+ pclk = clk_get(&pdev->dev, "pclk");
+ if (IS_ERR(pclk)) {
+ dev_err(&pdev->dev, "no pclk defined\n");
+ return PTR_ERR(pclk);
+ }
+ hclk = clk_get(&pdev->dev, "hclk");
+ if (IS_ERR(hclk)) {
+ dev_err(&pdev->dev, "no hclk defined\n");
+ clk_put(pclk);
+ return PTR_ERR(hclk);
+ }
+
+ clk_enable(pclk);
+ clk_enable(hclk);
+
+ dev_info(&pdev->dev, "Atmel Peripheral DMA Controller enabled\n");
+ return 0;
+}
+
+static struct platform_driver pdc_driver = {
+ .probe = pdc_probe,
+ .driver = {
+ .name = "pdc",
+ },
+};
+
+static int __init pdc_init(void)
+{
+ return platform_driver_register(&pdc_driver);
+}
+arch_initcall(pdc_init);
diff --git a/arch/avr32/mach-at32ap/at32ap7000.c b/arch/avr32/mach-at32ap/at32ap7000.c
new file mode 100644
index 000000000000..37982b60398e
--- /dev/null
+++ b/arch/avr32/mach-at32ap/at32ap7000.c
@@ -0,0 +1,876 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+
+#include <asm/arch/board.h>
+#include <asm/arch/portmux.h>
+#include <asm/arch/sm.h>
+
+#include "clock.h"
+#include "pio.h"
+#include "sm.h"
+
+#define PBMEM(base) \
+ { \
+ .start = base, \
+ .end = base + 0x3ff, \
+ .flags = IORESOURCE_MEM, \
+ }
+#define IRQ(num) \
+ { \
+ .start = num, \
+ .end = num, \
+ .flags = IORESOURCE_IRQ, \
+ }
+#define NAMED_IRQ(num, _name) \
+ { \
+ .start = num, \
+ .end = num, \
+ .name = _name, \
+ .flags = IORESOURCE_IRQ, \
+ }
+
+#define DEFINE_DEV(_name, _id) \
+static struct platform_device _name##_id##_device = { \
+ .name = #_name, \
+ .id = _id, \
+ .resource = _name##_id##_resource, \
+ .num_resources = ARRAY_SIZE(_name##_id##_resource), \
+}
+#define DEFINE_DEV_DATA(_name, _id) \
+static struct platform_device _name##_id##_device = { \
+ .name = #_name, \
+ .id = _id, \
+ .dev = { \
+ .platform_data = &_name##_id##_data, \
+ }, \
+ .resource = _name##_id##_resource, \
+ .num_resources = ARRAY_SIZE(_name##_id##_resource), \
+}
+
+#define DEV_CLK(_name, devname, bus, _index) \
+static struct clk devname##_##_name = { \
+ .name = #_name, \
+ .dev = &devname##_device.dev, \
+ .parent = &bus##_clk, \
+ .mode = bus##_clk_mode, \
+ .get_rate = bus##_clk_get_rate, \
+ .index = _index, \
+}
+
+enum {
+ PIOA,
+ PIOB,
+ PIOC,
+ PIOD,
+};
+
+enum {
+ FUNC_A,
+ FUNC_B,
+};
+
+unsigned long at32ap7000_osc_rates[3] = {
+ [0] = 32768,
+ /* FIXME: these are ATSTK1002-specific */
+ [1] = 20000000,
+ [2] = 12000000,
+};
+
+static unsigned long osc_get_rate(struct clk *clk)
+{
+ return at32ap7000_osc_rates[clk->index];
+}
+
+static unsigned long pll_get_rate(struct clk *clk, unsigned long control)
+{
+ unsigned long div, mul, rate;
+
+ if (!(control & SM_BIT(PLLEN)))
+ return 0;
+
+ div = SM_BFEXT(PLLDIV, control) + 1;
+ mul = SM_BFEXT(PLLMUL, control) + 1;
+
+ rate = clk->parent->get_rate(clk->parent);
+ rate = (rate + div / 2) / div;
+ rate *= mul;
+
+ return rate;
+}
+
+static unsigned long pll0_get_rate(struct clk *clk)
+{
+ u32 control;
+
+ control = sm_readl(&system_manager, PM_PLL0);
+
+ return pll_get_rate(clk, control);
+}
+
+static unsigned long pll1_get_rate(struct clk *clk)
+{
+ u32 control;
+
+ control = sm_readl(&system_manager, PM_PLL1);
+
+ return pll_get_rate(clk, control);
+}
+
+/*
+ * The AT32AP7000 has five primary clock sources: One 32kHz
+ * oscillator, two crystal oscillators and two PLLs.
+ */
+static struct clk osc32k = {
+ .name = "osc32k",
+ .get_rate = osc_get_rate,
+ .users = 1,
+ .index = 0,
+};
+static struct clk osc0 = {
+ .name = "osc0",
+ .get_rate = osc_get_rate,
+ .users = 1,
+ .index = 1,
+};
+static struct clk osc1 = {
+ .name = "osc1",
+ .get_rate = osc_get_rate,
+ .index = 2,
+};
+static struct clk pll0 = {
+ .name = "pll0",
+ .get_rate = pll0_get_rate,
+ .parent = &osc0,
+};
+static struct clk pll1 = {
+ .name = "pll1",
+ .get_rate = pll1_get_rate,
+ .parent = &osc0,
+};
+
+/*
+ * The main clock can be either osc0 or pll0. The boot loader may
+ * have chosen one for us, so we don't really know which one until we
+ * have a look at the SM.
+ */
+static struct clk *main_clock;
+
+/*
+ * Synchronous clocks are generated from the main clock. The clocks
+ * must satisfy the constraint
+ * fCPU >= fHSB >= fPB
+ * i.e. each clock must not be faster than its parent.
+ */
+static unsigned long bus_clk_get_rate(struct clk *clk, unsigned int shift)
+{
+ return main_clock->get_rate(main_clock) >> shift;
+};
+
+static void cpu_clk_mode(struct clk *clk, int enabled)
+{
+ struct at32_sm *sm = &system_manager;
+ unsigned long flags;
+ u32 mask;
+
+ spin_lock_irqsave(&sm->lock, flags);
+ mask = sm_readl(sm, PM_CPU_MASK);
+ if (enabled)
+ mask |= 1 << clk->index;
+ else
+ mask &= ~(1 << clk->index);
+ sm_writel(sm, PM_CPU_MASK, mask);
+ spin_unlock_irqrestore(&sm->lock, flags);
+}
+
+static unsigned long cpu_clk_get_rate(struct clk *clk)
+{
+ unsigned long cksel, shift = 0;
+
+ cksel = sm_readl(&system_manager, PM_CKSEL);
+ if (cksel & SM_BIT(CPUDIV))
+ shift = SM_BFEXT(CPUSEL, cksel) + 1;
+
+ return bus_clk_get_rate(clk, shift);
+}
+
+static void hsb_clk_mode(struct clk *clk, int enabled)
+{
+ struct at32_sm *sm = &system_manager;
+ unsigned long flags;
+ u32 mask;
+
+ spin_lock_irqsave(&sm->lock, flags);
+ mask = sm_readl(sm, PM_HSB_MASK);
+ if (enabled)
+ mask |= 1 << clk->index;
+ else
+ mask &= ~(1 << clk->index);
+ sm_writel(sm, PM_HSB_MASK, mask);
+ spin_unlock_irqrestore(&sm->lock, flags);
+}
+
+static unsigned long hsb_clk_get_rate(struct clk *clk)
+{
+ unsigned long cksel, shift = 0;
+
+ cksel = sm_readl(&system_manager, PM_CKSEL);
+ if (cksel & SM_BIT(HSBDIV))
+ shift = SM_BFEXT(HSBSEL, cksel) + 1;
+
+ return bus_clk_get_rate(clk, shift);
+}
+
+static void pba_clk_mode(struct clk *clk, int enabled)
+{
+ struct at32_sm *sm = &system_manager;
+ unsigned long flags;
+ u32 mask;
+
+ spin_lock_irqsave(&sm->lock, flags);
+ mask = sm_readl(sm, PM_PBA_MASK);
+ if (enabled)
+ mask |= 1 << clk->index;
+ else
+ mask &= ~(1 << clk->index);
+ sm_writel(sm, PM_PBA_MASK, mask);
+ spin_unlock_irqrestore(&sm->lock, flags);
+}
+
+static unsigned long pba_clk_get_rate(struct clk *clk)
+{
+ unsigned long cksel, shift = 0;
+
+ cksel = sm_readl(&system_manager, PM_CKSEL);
+ if (cksel & SM_BIT(PBADIV))
+ shift = SM_BFEXT(PBASEL, cksel) + 1;
+
+ return bus_clk_get_rate(clk, shift);
+}
+
+static void pbb_clk_mode(struct clk *clk, int enabled)
+{
+ struct at32_sm *sm = &system_manager;
+ unsigned long flags;
+ u32 mask;
+
+ spin_lock_irqsave(&sm->lock, flags);
+ mask = sm_readl(sm, PM_PBB_MASK);
+ if (enabled)
+ mask |= 1 << clk->index;
+ else
+ mask &= ~(1 << clk->index);
+ sm_writel(sm, PM_PBB_MASK, mask);
+ spin_unlock_irqrestore(&sm->lock, flags);
+}
+
+static unsigned long pbb_clk_get_rate(struct clk *clk)
+{
+ unsigned long cksel, shift = 0;
+
+ cksel = sm_readl(&system_manager, PM_CKSEL);
+ if (cksel & SM_BIT(PBBDIV))
+ shift = SM_BFEXT(PBBSEL, cksel) + 1;
+
+ return bus_clk_get_rate(clk, shift);
+}
+
+static struct clk cpu_clk = {
+ .name = "cpu",
+ .get_rate = cpu_clk_get_rate,
+ .users = 1,
+};
+static struct clk hsb_clk = {
+ .name = "hsb",
+ .parent = &cpu_clk,
+ .get_rate = hsb_clk_get_rate,
+};
+static struct clk pba_clk = {
+ .name = "pba",
+ .parent = &hsb_clk,
+ .mode = hsb_clk_mode,
+ .get_rate = pba_clk_get_rate,
+ .index = 1,
+};
+static struct clk pbb_clk = {
+ .name = "pbb",
+ .parent = &hsb_clk,
+ .mode = hsb_clk_mode,
+ .get_rate = pbb_clk_get_rate,
+ .users = 1,
+ .index = 2,
+};
+
+/* --------------------------------------------------------------------
+ * Generic Clock operations
+ * -------------------------------------------------------------------- */
+
+static void genclk_mode(struct clk *clk, int enabled)
+{
+ u32 control;
+
+ BUG_ON(clk->index > 7);
+
+ control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
+ if (enabled)
+ control |= SM_BIT(CEN);
+ else
+ control &= ~SM_BIT(CEN);
+ sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, control);
+}
+
+static unsigned long genclk_get_rate(struct clk *clk)
+{
+ u32 control;
+ unsigned long div = 1;
+
+ BUG_ON(clk->index > 7);
+
+ if (!clk->parent)
+ return 0;
+
+ control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
+ if (control & SM_BIT(DIVEN))
+ div = 2 * (SM_BFEXT(DIV, control) + 1);
+
+ return clk->parent->get_rate(clk->parent) / div;
+}
+
+static long genclk_set_rate(struct clk *clk, unsigned long rate, int apply)
+{
+ u32 control;
+ unsigned long parent_rate, actual_rate, div;
+
+ BUG_ON(clk->index > 7);
+
+ if (!clk->parent)
+ return 0;
+
+ parent_rate = clk->parent->get_rate(clk->parent);
+ control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
+
+ if (rate > 3 * parent_rate / 4) {
+ actual_rate = parent_rate;
+ control &= ~SM_BIT(DIVEN);
+ } else {
+ div = (parent_rate + rate) / (2 * rate) - 1;
+ control = SM_BFINS(DIV, div, control) | SM_BIT(DIVEN);
+ actual_rate = parent_rate / (2 * (div + 1));
+ }
+
+ printk("clk %s: new rate %lu (actual rate %lu)\n",
+ clk->name, rate, actual_rate);
+
+ if (apply)
+ sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index,
+ control);
+
+ return actual_rate;
+}
+
+int genclk_set_parent(struct clk *clk, struct clk *parent)
+{
+ u32 control;
+
+ BUG_ON(clk->index > 7);
+
+ printk("clk %s: new parent %s (was %s)\n",
+ clk->name, parent->name,
+ clk->parent ? clk->parent->name : "(null)");
+
+ control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
+
+ if (parent == &osc1 || parent == &pll1)
+ control |= SM_BIT(OSCSEL);
+ else if (parent == &osc0 || parent == &pll0)
+ control &= ~SM_BIT(OSCSEL);
+ else
+ return -EINVAL;
+
+ if (parent == &pll0 || parent == &pll1)
+ control |= SM_BIT(PLLSEL);
+ else
+ control &= ~SM_BIT(PLLSEL);
+
+ sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, control);
+ clk->parent = parent;
+
+ return 0;
+}
+
+/* --------------------------------------------------------------------
+ * System peripherals
+ * -------------------------------------------------------------------- */
+static struct resource sm_resource[] = {
+ PBMEM(0xfff00000),
+ NAMED_IRQ(19, "eim"),
+ NAMED_IRQ(20, "pm"),
+ NAMED_IRQ(21, "rtc"),
+};
+struct platform_device at32_sm_device = {
+ .name = "sm",
+ .id = 0,
+ .resource = sm_resource,
+ .num_resources = ARRAY_SIZE(sm_resource),
+};
+DEV_CLK(pclk, at32_sm, pbb, 0);
+
+static struct resource intc0_resource[] = {
+ PBMEM(0xfff00400),
+};
+struct platform_device at32_intc0_device = {
+ .name = "intc",
+ .id = 0,
+ .resource = intc0_resource,
+ .num_resources = ARRAY_SIZE(intc0_resource),
+};
+DEV_CLK(pclk, at32_intc0, pbb, 1);
+
+static struct clk ebi_clk = {
+ .name = "ebi",
+ .parent = &hsb_clk,
+ .mode = hsb_clk_mode,
+ .get_rate = hsb_clk_get_rate,
+ .users = 1,
+};
+static struct clk hramc_clk = {
+ .name = "hramc",
+ .parent = &hsb_clk,
+ .mode = hsb_clk_mode,
+ .get_rate = hsb_clk_get_rate,
+ .users = 1,
+};
+
+static struct resource smc0_resource[] = {
+ PBMEM(0xfff03400),
+};
+DEFINE_DEV(smc, 0);
+DEV_CLK(pclk, smc0, pbb, 13);
+DEV_CLK(mck, smc0, hsb, 0);
+
+static struct platform_device pdc_device = {
+ .name = "pdc",
+ .id = 0,
+};
+DEV_CLK(hclk, pdc, hsb, 4);
+DEV_CLK(pclk, pdc, pba, 16);
+
+static struct clk pico_clk = {
+ .name = "pico",
+ .parent = &cpu_clk,
+ .mode = cpu_clk_mode,
+ .get_rate = cpu_clk_get_rate,
+ .users = 1,
+};
+
+/* --------------------------------------------------------------------
+ * PIO
+ * -------------------------------------------------------------------- */
+
+static struct resource pio0_resource[] = {
+ PBMEM(0xffe02800),
+ IRQ(13),
+};
+DEFINE_DEV(pio, 0);
+DEV_CLK(mck, pio0, pba, 10);
+
+static struct resource pio1_resource[] = {
+ PBMEM(0xffe02c00),
+ IRQ(14),
+};
+DEFINE_DEV(pio, 1);
+DEV_CLK(mck, pio1, pba, 11);
+
+static struct resource pio2_resource[] = {
+ PBMEM(0xffe03000),
+ IRQ(15),
+};
+DEFINE_DEV(pio, 2);
+DEV_CLK(mck, pio2, pba, 12);
+
+static struct resource pio3_resource[] = {
+ PBMEM(0xffe03400),
+ IRQ(16),
+};
+DEFINE_DEV(pio, 3);
+DEV_CLK(mck, pio3, pba, 13);
+
+void __init at32_add_system_devices(void)
+{
+ system_manager.eim_first_irq = NR_INTERNAL_IRQS;
+
+ platform_device_register(&at32_sm_device);
+ platform_device_register(&at32_intc0_device);
+ platform_device_register(&smc0_device);
+ platform_device_register(&pdc_device);
+
+ platform_device_register(&pio0_device);
+ platform_device_register(&pio1_device);
+ platform_device_register(&pio2_device);
+ platform_device_register(&pio3_device);
+}
+
+/* --------------------------------------------------------------------
+ * USART
+ * -------------------------------------------------------------------- */
+
+static struct resource usart0_resource[] = {
+ PBMEM(0xffe00c00),
+ IRQ(7),
+};
+DEFINE_DEV(usart, 0);
+DEV_CLK(usart, usart0, pba, 4);
+
+static struct resource usart1_resource[] = {
+ PBMEM(0xffe01000),
+ IRQ(7),
+};
+DEFINE_DEV(usart, 1);
+DEV_CLK(usart, usart1, pba, 4);
+
+static struct resource usart2_resource[] = {
+ PBMEM(0xffe01400),
+ IRQ(8),
+};
+DEFINE_DEV(usart, 2);
+DEV_CLK(usart, usart2, pba, 5);
+
+static struct resource usart3_resource[] = {
+ PBMEM(0xffe01800),
+ IRQ(9),
+};
+DEFINE_DEV(usart, 3);
+DEV_CLK(usart, usart3, pba, 6);
+
+static inline void configure_usart0_pins(void)
+{
+ portmux_set_func(PIOA, 8, FUNC_B); /* RXD */
+ portmux_set_func(PIOA, 9, FUNC_B); /* TXD */
+}
+
+static inline void configure_usart1_pins(void)
+{
+ portmux_set_func(PIOA, 17, FUNC_A); /* RXD */
+ portmux_set_func(PIOA, 18, FUNC_A); /* TXD */
+}
+
+static inline void configure_usart2_pins(void)
+{
+ portmux_set_func(PIOB, 26, FUNC_B); /* RXD */
+ portmux_set_func(PIOB, 27, FUNC_B); /* TXD */
+}
+
+static inline void configure_usart3_pins(void)
+{
+ portmux_set_func(PIOB, 18, FUNC_B); /* RXD */
+ portmux_set_func(PIOB, 17, FUNC_B); /* TXD */
+}
+
+static struct platform_device *setup_usart(unsigned int id)
+{
+ struct platform_device *pdev;
+
+ switch (id) {
+ case 0:
+ pdev = &usart0_device;
+ configure_usart0_pins();
+ break;
+ case 1:
+ pdev = &usart1_device;
+ configure_usart1_pins();
+ break;
+ case 2:
+ pdev = &usart2_device;
+ configure_usart2_pins();
+ break;
+ case 3:
+ pdev = &usart3_device;
+ configure_usart3_pins();
+ break;
+ default:
+ pdev = NULL;
+ break;
+ }
+
+ return pdev;
+}
+
+struct platform_device *__init at32_add_device_usart(unsigned int id)
+{
+ struct platform_device *pdev;
+
+ pdev = setup_usart(id);
+ if (pdev)
+ platform_device_register(pdev);
+
+ return pdev;
+}
+
+struct platform_device *at91_default_console_device;
+
+void __init at32_setup_serial_console(unsigned int usart_id)
+{
+ at91_default_console_device = setup_usart(usart_id);
+}
+
+/* --------------------------------------------------------------------
+ * Ethernet
+ * -------------------------------------------------------------------- */
+
+static struct eth_platform_data macb0_data;
+static struct resource macb0_resource[] = {
+ PBMEM(0xfff01800),
+ IRQ(25),
+};
+DEFINE_DEV_DATA(macb, 0);
+DEV_CLK(hclk, macb0, hsb, 8);
+DEV_CLK(pclk, macb0, pbb, 6);
+
+struct platform_device *__init
+at32_add_device_eth(unsigned int id, struct eth_platform_data *data)
+{
+ struct platform_device *pdev;
+
+ switch (id) {
+ case 0:
+ pdev = &macb0_device;
+
+ portmux_set_func(PIOC, 3, FUNC_A); /* TXD0 */
+ portmux_set_func(PIOC, 4, FUNC_A); /* TXD1 */
+ portmux_set_func(PIOC, 7, FUNC_A); /* TXEN */
+ portmux_set_func(PIOC, 8, FUNC_A); /* TXCK */
+ portmux_set_func(PIOC, 9, FUNC_A); /* RXD0 */
+ portmux_set_func(PIOC, 10, FUNC_A); /* RXD1 */
+ portmux_set_func(PIOC, 13, FUNC_A); /* RXER */
+ portmux_set_func(PIOC, 15, FUNC_A); /* RXDV */
+ portmux_set_func(PIOC, 16, FUNC_A); /* MDC */
+ portmux_set_func(PIOC, 17, FUNC_A); /* MDIO */
+
+ if (!data->is_rmii) {
+ portmux_set_func(PIOC, 0, FUNC_A); /* COL */
+ portmux_set_func(PIOC, 1, FUNC_A); /* CRS */
+ portmux_set_func(PIOC, 2, FUNC_A); /* TXER */
+ portmux_set_func(PIOC, 5, FUNC_A); /* TXD2 */
+ portmux_set_func(PIOC, 6, FUNC_A); /* TXD3 */
+ portmux_set_func(PIOC, 11, FUNC_A); /* RXD2 */
+ portmux_set_func(PIOC, 12, FUNC_A); /* RXD3 */
+ portmux_set_func(PIOC, 14, FUNC_A); /* RXCK */
+ portmux_set_func(PIOC, 18, FUNC_A); /* SPD */
+ }
+ break;
+
+ default:
+ return NULL;
+ }
+
+ memcpy(pdev->dev.platform_data, data, sizeof(struct eth_platform_data));
+ platform_device_register(pdev);
+
+ return pdev;
+}
+
+/* --------------------------------------------------------------------
+ * SPI
+ * -------------------------------------------------------------------- */
+static struct resource spi0_resource[] = {
+ PBMEM(0xffe00000),
+ IRQ(3),
+};
+DEFINE_DEV(spi, 0);
+DEV_CLK(mck, spi0, pba, 0);
+
+struct platform_device *__init at32_add_device_spi(unsigned int id)
+{
+ struct platform_device *pdev;
+
+ switch (id) {
+ case 0:
+ pdev = &spi0_device;
+ portmux_set_func(PIOA, 0, FUNC_A); /* MISO */
+ portmux_set_func(PIOA, 1, FUNC_A); /* MOSI */
+ portmux_set_func(PIOA, 2, FUNC_A); /* SCK */
+ portmux_set_func(PIOA, 3, FUNC_A); /* NPCS0 */
+ portmux_set_func(PIOA, 4, FUNC_A); /* NPCS1 */
+ portmux_set_func(PIOA, 5, FUNC_A); /* NPCS2 */
+ break;
+
+ default:
+ return NULL;
+ }
+
+ platform_device_register(pdev);
+ return pdev;
+}
+
+/* --------------------------------------------------------------------
+ * LCDC
+ * -------------------------------------------------------------------- */
+static struct lcdc_platform_data lcdc0_data;
+static struct resource lcdc0_resource[] = {
+ {
+ .start = 0xff000000,
+ .end = 0xff000fff,
+ .flags = IORESOURCE_MEM,
+ },
+ IRQ(1),
+};
+DEFINE_DEV_DATA(lcdc, 0);
+DEV_CLK(hclk, lcdc0, hsb, 7);
+static struct clk lcdc0_pixclk = {
+ .name = "pixclk",
+ .dev = &lcdc0_device.dev,
+ .mode = genclk_mode,
+ .get_rate = genclk_get_rate,
+ .set_rate = genclk_set_rate,
+ .set_parent = genclk_set_parent,
+ .index = 7,
+};
+
+struct platform_device *__init
+at32_add_device_lcdc(unsigned int id, struct lcdc_platform_data *data)
+{
+ struct platform_device *pdev;
+
+ switch (id) {
+ case 0:
+ pdev = &lcdc0_device;
+ portmux_set_func(PIOC, 19, FUNC_A); /* CC */
+ portmux_set_func(PIOC, 20, FUNC_A); /* HSYNC */
+ portmux_set_func(PIOC, 21, FUNC_A); /* PCLK */
+ portmux_set_func(PIOC, 22, FUNC_A); /* VSYNC */
+ portmux_set_func(PIOC, 23, FUNC_A); /* DVAL */
+ portmux_set_func(PIOC, 24, FUNC_A); /* MODE */
+ portmux_set_func(PIOC, 25, FUNC_A); /* PWR */
+ portmux_set_func(PIOC, 26, FUNC_A); /* DATA0 */
+ portmux_set_func(PIOC, 27, FUNC_A); /* DATA1 */
+ portmux_set_func(PIOC, 28, FUNC_A); /* DATA2 */
+ portmux_set_func(PIOC, 29, FUNC_A); /* DATA3 */
+ portmux_set_func(PIOC, 30, FUNC_A); /* DATA4 */
+ portmux_set_func(PIOC, 31, FUNC_A); /* DATA5 */
+ portmux_set_func(PIOD, 0, FUNC_A); /* DATA6 */
+ portmux_set_func(PIOD, 1, FUNC_A); /* DATA7 */
+ portmux_set_func(PIOD, 2, FUNC_A); /* DATA8 */
+ portmux_set_func(PIOD, 3, FUNC_A); /* DATA9 */
+ portmux_set_func(PIOD, 4, FUNC_A); /* DATA10 */
+ portmux_set_func(PIOD, 5, FUNC_A); /* DATA11 */
+ portmux_set_func(PIOD, 6, FUNC_A); /* DATA12 */
+ portmux_set_func(PIOD, 7, FUNC_A); /* DATA13 */
+ portmux_set_func(PIOD, 8, FUNC_A); /* DATA14 */
+ portmux_set_func(PIOD, 9, FUNC_A); /* DATA15 */
+ portmux_set_func(PIOD, 10, FUNC_A); /* DATA16 */
+ portmux_set_func(PIOD, 11, FUNC_A); /* DATA17 */
+ portmux_set_func(PIOD, 12, FUNC_A); /* DATA18 */
+ portmux_set_func(PIOD, 13, FUNC_A); /* DATA19 */
+ portmux_set_func(PIOD, 14, FUNC_A); /* DATA20 */
+ portmux_set_func(PIOD, 15, FUNC_A); /* DATA21 */
+ portmux_set_func(PIOD, 16, FUNC_A); /* DATA22 */
+ portmux_set_func(PIOD, 17, FUNC_A); /* DATA23 */
+
+ clk_set_parent(&lcdc0_pixclk, &pll0);
+ clk_set_rate(&lcdc0_pixclk, clk_get_rate(&pll0));
+ break;
+
+ default:
+ return NULL;
+ }
+
+ memcpy(pdev->dev.platform_data, data,
+ sizeof(struct lcdc_platform_data));
+
+ platform_device_register(pdev);
+ return pdev;
+}
+
+struct clk *at32_clock_list[] = {
+ &osc32k,
+ &osc0,
+ &osc1,
+ &pll0,
+ &pll1,
+ &cpu_clk,
+ &hsb_clk,
+ &pba_clk,
+ &pbb_clk,
+ &at32_sm_pclk,
+ &at32_intc0_pclk,
+ &ebi_clk,
+ &hramc_clk,
+ &smc0_pclk,
+ &smc0_mck,
+ &pdc_hclk,
+ &pdc_pclk,
+ &pico_clk,
+ &pio0_mck,
+ &pio1_mck,
+ &pio2_mck,
+ &pio3_mck,
+ &usart0_usart,
+ &usart1_usart,
+ &usart2_usart,
+ &usart3_usart,
+ &macb0_hclk,
+ &macb0_pclk,
+ &spi0_mck,
+ &lcdc0_hclk,
+ &lcdc0_pixclk,
+};
+unsigned int at32_nr_clocks = ARRAY_SIZE(at32_clock_list);
+
+void __init at32_portmux_init(void)
+{
+ at32_init_pio(&pio0_device);
+ at32_init_pio(&pio1_device);
+ at32_init_pio(&pio2_device);
+ at32_init_pio(&pio3_device);
+}
+
+void __init at32_clock_init(void)
+{
+ struct at32_sm *sm = &system_manager;
+ u32 cpu_mask = 0, hsb_mask = 0, pba_mask = 0, pbb_mask = 0;
+ int i;
+
+ if (sm_readl(sm, PM_MCCTRL) & SM_BIT(PLLSEL))
+ main_clock = &pll0;
+ else
+ main_clock = &osc0;
+
+ if (sm_readl(sm, PM_PLL0) & SM_BIT(PLLOSC))
+ pll0.parent = &osc1;
+ if (sm_readl(sm, PM_PLL1) & SM_BIT(PLLOSC))
+ pll1.parent = &osc1;
+
+ /*
+ * Turn on all clocks that have at least one user already, and
+ * turn off everything else. We only do this for module
+ * clocks, and even though it isn't particularly pretty to
+ * check the address of the mode function, it should do the
+ * trick...
+ */
+ for (i = 0; i < ARRAY_SIZE(at32_clock_list); i++) {
+ struct clk *clk = at32_clock_list[i];
+
+ if (clk->mode == &cpu_clk_mode)
+ cpu_mask |= 1 << clk->index;
+ else if (clk->mode == &hsb_clk_mode)
+ hsb_mask |= 1 << clk->index;
+ else if (clk->mode == &pba_clk_mode)
+ pba_mask |= 1 << clk->index;
+ else if (clk->mode == &pbb_clk_mode)
+ pbb_mask |= 1 << clk->index;
+ }
+
+ sm_writel(sm, PM_CPU_MASK, cpu_mask);
+ sm_writel(sm, PM_HSB_MASK, hsb_mask);
+ sm_writel(sm, PM_PBA_MASK, pba_mask);
+ sm_writel(sm, PM_PBB_MASK, pbb_mask);
+}
diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c
new file mode 100644
index 000000000000..3d0d1097389f
--- /dev/null
+++ b/arch/avr32/mach-at32ap/clock.c
@@ -0,0 +1,148 @@
+/*
+ * Clock management for AT32AP CPUs
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * Based on arch/arm/mach-at91rm9200/clock.c
+ * Copyright (C) 2005 David Brownell
+ * Copyright (C) 2005 Ivan Kokshaysky
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/string.h>
+
+#include "clock.h"
+
+static spinlock_t clk_lock = SPIN_LOCK_UNLOCKED;
+
+struct clk *clk_get(struct device *dev, const char *id)
+{
+ int i;
+
+ for (i = 0; i < at32_nr_clocks; i++) {
+ struct clk *clk = at32_clock_list[i];
+
+ if (clk->dev == dev && strcmp(id, clk->name) == 0)
+ return clk;
+ }
+
+ return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL(clk_get);
+
+void clk_put(struct clk *clk)
+{
+ /* clocks are static for now, we can't free them */
+}
+EXPORT_SYMBOL(clk_put);
+
+static void __clk_enable(struct clk *clk)
+{
+ if (clk->parent)
+ __clk_enable(clk->parent);
+ if (clk->users++ == 0 && clk->mode)
+ clk->mode(clk, 1);
+}
+
+int clk_enable(struct clk *clk)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&clk_lock, flags);
+ __clk_enable(clk);
+ spin_unlock_irqrestore(&clk_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(clk_enable);
+
+static void __clk_disable(struct clk *clk)
+{
+ BUG_ON(clk->users == 0);
+
+ if (--clk->users == 0 && clk->mode)
+ clk->mode(clk, 0);
+ if (clk->parent)
+ __clk_disable(clk->parent);
+}
+
+void clk_disable(struct clk *clk)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&clk_lock, flags);
+ __clk_disable(clk);
+ spin_unlock_irqrestore(&clk_lock, flags);
+}
+EXPORT_SYMBOL(clk_disable);
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+ unsigned long flags;
+ unsigned long rate;
+
+ spin_lock_irqsave(&clk_lock, flags);
+ rate = clk->get_rate(clk);
+ spin_unlock_irqrestore(&clk_lock, flags);
+
+ return rate;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+ unsigned long flags, actual_rate;
+
+ if (!clk->set_rate)
+ return -ENOSYS;
+
+ spin_lock_irqsave(&clk_lock, flags);
+ actual_rate = clk->set_rate(clk, rate, 0);
+ spin_unlock_irqrestore(&clk_lock, flags);
+
+ return actual_rate;
+}
+EXPORT_SYMBOL(clk_round_rate);
+
+int clk_set_rate(struct clk *clk, unsigned long rate)
+{
+ unsigned long flags;
+ long ret;
+
+ if (!clk->set_rate)
+ return -ENOSYS;
+
+ spin_lock_irqsave(&clk_lock, flags);
+ ret = clk->set_rate(clk, rate, 1);
+ spin_unlock_irqrestore(&clk_lock, flags);
+
+ return (ret < 0) ? ret : 0;
+}
+EXPORT_SYMBOL(clk_set_rate);
+
+int clk_set_parent(struct clk *clk, struct clk *parent)
+{
+ unsigned long flags;
+ int ret;
+
+ if (!clk->set_parent)
+ return -ENOSYS;
+
+ spin_lock_irqsave(&clk_lock, flags);
+ ret = clk->set_parent(clk, parent);
+ spin_unlock_irqrestore(&clk_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(clk_set_parent);
+
+struct clk *clk_get_parent(struct clk *clk)
+{
+ return clk->parent;
+}
+EXPORT_SYMBOL(clk_get_parent);
diff --git a/arch/avr32/mach-at32ap/clock.h b/arch/avr32/mach-at32ap/clock.h
new file mode 100644
index 000000000000..f953f044ba4d
--- /dev/null
+++ b/arch/avr32/mach-at32ap/clock.h
@@ -0,0 +1,30 @@
+/*
+ * Clock management for AT32AP CPUs
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * Based on arch/arm/mach-at91rm9200/clock.c
+ * Copyright (C) 2005 David Brownell
+ * Copyright (C) 2005 Ivan Kokshaysky
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+
+struct clk {
+ const char *name; /* Clock name/function */
+ struct device *dev; /* Device the clock is used by */
+ struct clk *parent; /* Parent clock, if any */
+ void (*mode)(struct clk *clk, int enabled);
+ unsigned long (*get_rate)(struct clk *clk);
+ long (*set_rate)(struct clk *clk, unsigned long rate,
+ int apply);
+ int (*set_parent)(struct clk *clk, struct clk *parent);
+ u16 users; /* Enabled if non-zero */
+ u16 index; /* Sibling index */
+};
+
+extern struct clk *at32_clock_list[];
+extern unsigned int at32_nr_clocks;
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
new file mode 100644
index 000000000000..7da9c5f7a0eb
--- /dev/null
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -0,0 +1,171 @@
+/*
+ * External interrupt handling for AT32AP CPUs
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+#include <linux/random.h>
+
+#include <asm/io.h>
+
+#include <asm/arch/sm.h>
+
+#include "sm.h"
+
+static void eim_ack_irq(unsigned int irq)
+{
+ struct at32_sm *sm = get_irq_chip_data(irq);
+ sm_writel(sm, EIM_ICR, 1 << (irq - sm->eim_first_irq));
+}
+
+static void eim_mask_irq(unsigned int irq)
+{
+ struct at32_sm *sm = get_irq_chip_data(irq);
+ sm_writel(sm, EIM_IDR, 1 << (irq - sm->eim_first_irq));
+}
+
+static void eim_mask_ack_irq(unsigned int irq)
+{
+ struct at32_sm *sm = get_irq_chip_data(irq);
+ sm_writel(sm, EIM_ICR, 1 << (irq - sm->eim_first_irq));
+ sm_writel(sm, EIM_IDR, 1 << (irq - sm->eim_first_irq));
+}
+
+static void eim_unmask_irq(unsigned int irq)
+{
+ struct at32_sm *sm = get_irq_chip_data(irq);
+ sm_writel(sm, EIM_IER, 1 << (irq - sm->eim_first_irq));
+}
+
+static int eim_set_irq_type(unsigned int irq, unsigned int flow_type)
+{
+ struct at32_sm *sm = get_irq_chip_data(irq);
+ unsigned int i = irq - sm->eim_first_irq;
+ u32 mode, edge, level;
+ unsigned long flags;
+ int ret = 0;
+
+ flow_type &= IRQ_TYPE_SENSE_MASK;
+
+ spin_lock_irqsave(&sm->lock, flags);
+
+ mode = sm_readl(sm, EIM_MODE);
+ edge = sm_readl(sm, EIM_EDGE);
+ level = sm_readl(sm, EIM_LEVEL);
+
+ switch (flow_type) {
+ case IRQ_TYPE_LEVEL_LOW:
+ mode |= 1 << i;
+ level &= ~(1 << i);
+ break;
+ case IRQ_TYPE_LEVEL_HIGH:
+ mode |= 1 << i;
+ level |= 1 << i;
+ break;
+ case IRQ_TYPE_EDGE_RISING:
+ mode &= ~(1 << i);
+ edge |= 1 << i;
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ mode &= ~(1 << i);
+ edge &= ~(1 << i);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ sm_writel(sm, EIM_MODE, mode);
+ sm_writel(sm, EIM_EDGE, edge);
+ sm_writel(sm, EIM_LEVEL, level);
+
+ spin_unlock_irqrestore(&sm->lock, flags);
+
+ return ret;
+}
+
+struct irq_chip eim_chip = {
+ .name = "eim",
+ .ack = eim_ack_irq,
+ .mask = eim_mask_irq,
+ .mask_ack = eim_mask_ack_irq,
+ .unmask = eim_unmask_irq,
+ .set_type = eim_set_irq_type,
+};
+
+static void demux_eim_irq(unsigned int irq, struct irq_desc *desc,
+ struct pt_regs *regs)
+{
+ struct at32_sm *sm = desc->handler_data;
+ struct irq_desc *ext_desc;
+ unsigned long status, pending;
+ unsigned int i, ext_irq;
+
+ spin_lock(&sm->lock);
+
+ status = sm_readl(sm, EIM_ISR);
+ pending = status & sm_readl(sm, EIM_IMR);
+
+ while (pending) {
+ i = fls(pending) - 1;
+ pending &= ~(1 << i);
+
+ ext_irq = i + sm->eim_first_irq;
+ ext_desc = irq_desc + ext_irq;
+ ext_desc->handle_irq(ext_irq, ext_desc, regs);
+ }
+
+ spin_unlock(&sm->lock);
+}
+
+static int __init eim_init(void)
+{
+ struct at32_sm *sm = &system_manager;
+ unsigned int i;
+ unsigned int nr_irqs;
+ unsigned int int_irq;
+ u32 pattern;
+
+ /*
+ * The EIM is really the same module as SM, so register
+ * mapping, etc. has been taken care of already.
+ */
+
+ /*
+ * Find out how many interrupt lines that are actually
+ * implemented in hardware.
+ */
+ sm_writel(sm, EIM_IDR, ~0UL);
+ sm_writel(sm, EIM_MODE, ~0UL);
+ pattern = sm_readl(sm, EIM_MODE);
+ nr_irqs = fls(pattern);
+
+ sm->eim_chip = &eim_chip;
+
+ for (i = 0; i < nr_irqs; i++) {
+ set_irq_chip(sm->eim_first_irq + i, &eim_chip);
+ set_irq_chip_data(sm->eim_first_irq + i, sm);
+ }
+
+ int_irq = platform_get_irq_byname(sm->pdev, "eim");
+
+ set_irq_chained_handler(int_irq, demux_eim_irq);
+ set_irq_data(int_irq, sm);
+
+ printk("EIM: External Interrupt Module at 0x%p, IRQ %u\n",
+ sm->regs, int_irq);
+ printk("EIM: Handling %u external IRQs, starting with IRQ %u\n",
+ nr_irqs, sm->eim_first_irq);
+
+ return 0;
+}
+arch_initcall(eim_init);
diff --git a/arch/avr32/mach-at32ap/hsmc.c b/arch/avr32/mach-at32ap/hsmc.c
new file mode 100644
index 000000000000..7691721928a7
--- /dev/null
+++ b/arch/avr32/mach-at32ap/hsmc.c
@@ -0,0 +1,164 @@
+/*
+ * Static Memory Controller for AT32 chips
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define DEBUG
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+#include <asm/arch/smc.h>
+
+#include "hsmc.h"
+
+#define NR_CHIP_SELECTS 6
+
+struct hsmc {
+ void __iomem *regs;
+ struct clk *pclk;
+ struct clk *mck;
+};
+
+static struct hsmc *hsmc;
+
+int smc_set_configuration(int cs, const struct smc_config *config)
+{
+ unsigned long mul;
+ unsigned long offset;
+ u32 setup, pulse, cycle, mode;
+
+ if (!hsmc)
+ return -ENODEV;
+ if (cs >= NR_CHIP_SELECTS)
+ return -EINVAL;
+
+ /*
+ * cycles = x / T = x * f
+ * = ((x * 1000000000) * ((f * 65536) / 1000000000)) / 65536
+ * = ((x * 1000000000) * (((f / 10000) * 65536) / 100000)) / 65536
+ */
+ mul = (clk_get_rate(hsmc->mck) / 10000) << 16;
+ mul /= 100000;
+
+#define ns2cyc(x) ((((x) * mul) + 65535) >> 16)
+
+ setup = (HSMC_BF(NWE_SETUP, ns2cyc(config->nwe_setup))
+ | HSMC_BF(NCS_WR_SETUP, ns2cyc(config->ncs_write_setup))
+ | HSMC_BF(NRD_SETUP, ns2cyc(config->nrd_setup))
+ | HSMC_BF(NCS_RD_SETUP, ns2cyc(config->ncs_read_setup)));
+ pulse = (HSMC_BF(NWE_PULSE, ns2cyc(config->nwe_pulse))
+ | HSMC_BF(NCS_WR_PULSE, ns2cyc(config->ncs_write_pulse))
+ | HSMC_BF(NRD_PULSE, ns2cyc(config->nrd_pulse))
+ | HSMC_BF(NCS_RD_PULSE, ns2cyc(config->ncs_read_pulse)));
+ cycle = (HSMC_BF(NWE_CYCLE, ns2cyc(config->write_cycle))
+ | HSMC_BF(NRD_CYCLE, ns2cyc(config->read_cycle)));
+
+ switch (config->bus_width) {
+ case 1:
+ mode = HSMC_BF(DBW, HSMC_DBW_8_BITS);
+ break;
+ case 2:
+ mode = HSMC_BF(DBW, HSMC_DBW_16_BITS);
+ break;
+ case 4:
+ mode = HSMC_BF(DBW, HSMC_DBW_32_BITS);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (config->nrd_controlled)
+ mode |= HSMC_BIT(READ_MODE);
+ if (config->nwe_controlled)
+ mode |= HSMC_BIT(WRITE_MODE);
+ if (config->byte_write)
+ mode |= HSMC_BIT(BAT);
+
+ pr_debug("smc cs%d: setup/%08x pulse/%08x cycle/%08x mode/%08x\n",
+ cs, setup, pulse, cycle, mode);
+
+ offset = cs * 0x10;
+ hsmc_writel(hsmc, SETUP0 + offset, setup);
+ hsmc_writel(hsmc, PULSE0 + offset, pulse);
+ hsmc_writel(hsmc, CYCLE0 + offset, cycle);
+ hsmc_writel(hsmc, MODE0 + offset, mode);
+ hsmc_readl(hsmc, MODE0); /* I/O barrier */
+
+ return 0;
+}
+EXPORT_SYMBOL(smc_set_configuration);
+
+static int hsmc_probe(struct platform_device *pdev)
+{
+ struct resource *regs;
+ struct clk *pclk, *mck;
+ int ret;
+
+ if (hsmc)
+ return -EBUSY;
+
+ regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!regs)
+ return -ENXIO;
+ pclk = clk_get(&pdev->dev, "pclk");
+ if (IS_ERR(pclk))
+ return PTR_ERR(pclk);
+ mck = clk_get(&pdev->dev, "mck");
+ if (IS_ERR(mck)) {
+ ret = PTR_ERR(mck);
+ goto out_put_pclk;
+ }
+
+ ret = -ENOMEM;
+ hsmc = kzalloc(sizeof(struct hsmc), GFP_KERNEL);
+ if (!hsmc)
+ goto out_put_clocks;
+
+ clk_enable(pclk);
+ clk_enable(mck);
+
+ hsmc->pclk = pclk;
+ hsmc->mck = mck;
+ hsmc->regs = ioremap(regs->start, regs->end - regs->start + 1);
+ if (!hsmc->regs)
+ goto out_disable_clocks;
+
+ dev_info(&pdev->dev, "Atmel Static Memory Controller at 0x%08lx\n",
+ (unsigned long)regs->start);
+
+ platform_set_drvdata(pdev, hsmc);
+
+ return 0;
+
+out_disable_clocks:
+ clk_disable(mck);
+ clk_disable(pclk);
+ kfree(hsmc);
+out_put_clocks:
+ clk_put(mck);
+out_put_pclk:
+ clk_put(pclk);
+ hsmc = NULL;
+ return ret;
+}
+
+static struct platform_driver hsmc_driver = {
+ .probe = hsmc_probe,
+ .driver = {
+ .name = "smc",
+ },
+};
+
+static int __init hsmc_init(void)
+{
+ return platform_driver_register(&hsmc_driver);
+}
+arch_initcall(hsmc_init);
diff --git a/arch/avr32/mach-at32ap/hsmc.h b/arch/avr32/mach-at32ap/hsmc.h
new file mode 100644
index 000000000000..5681276fafdb
--- /dev/null
+++ b/arch/avr32/mach-at32ap/hsmc.h
@@ -0,0 +1,127 @@
+/*
+ * Register definitions for Atmel Static Memory Controller (SMC)
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_HSMC_H__
+#define __ASM_AVR32_HSMC_H__
+
+/* HSMC register offsets */
+#define HSMC_SETUP0 0x0000
+#define HSMC_PULSE0 0x0004
+#define HSMC_CYCLE0 0x0008
+#define HSMC_MODE0 0x000c
+#define HSMC_SETUP1 0x0010
+#define HSMC_PULSE1 0x0014
+#define HSMC_CYCLE1 0x0018
+#define HSMC_MODE1 0x001c
+#define HSMC_SETUP2 0x0020
+#define HSMC_PULSE2 0x0024
+#define HSMC_CYCLE2 0x0028
+#define HSMC_MODE2 0x002c
+#define HSMC_SETUP3 0x0030
+#define HSMC_PULSE3 0x0034
+#define HSMC_CYCLE3 0x0038
+#define HSMC_MODE3 0x003c
+#define HSMC_SETUP4 0x0040
+#define HSMC_PULSE4 0x0044
+#define HSMC_CYCLE4 0x0048
+#define HSMC_MODE4 0x004c
+#define HSMC_SETUP5 0x0050
+#define HSMC_PULSE5 0x0054
+#define HSMC_CYCLE5 0x0058
+#define HSMC_MODE5 0x005c
+
+/* Bitfields in SETUP0 */
+#define HSMC_NWE_SETUP_OFFSET 0
+#define HSMC_NWE_SETUP_SIZE 6
+#define HSMC_NCS_WR_SETUP_OFFSET 8
+#define HSMC_NCS_WR_SETUP_SIZE 6
+#define HSMC_NRD_SETUP_OFFSET 16
+#define HSMC_NRD_SETUP_SIZE 6
+#define HSMC_NCS_RD_SETUP_OFFSET 24
+#define HSMC_NCS_RD_SETUP_SIZE 6
+
+/* Bitfields in PULSE0 */
+#define HSMC_NWE_PULSE_OFFSET 0
+#define HSMC_NWE_PULSE_SIZE 7
+#define HSMC_NCS_WR_PULSE_OFFSET 8
+#define HSMC_NCS_WR_PULSE_SIZE 7
+#define HSMC_NRD_PULSE_OFFSET 16
+#define HSMC_NRD_PULSE_SIZE 7
+#define HSMC_NCS_RD_PULSE_OFFSET 24
+#define HSMC_NCS_RD_PULSE_SIZE 7
+
+/* Bitfields in CYCLE0 */
+#define HSMC_NWE_CYCLE_OFFSET 0
+#define HSMC_NWE_CYCLE_SIZE 9
+#define HSMC_NRD_CYCLE_OFFSET 16
+#define HSMC_NRD_CYCLE_SIZE 9
+
+/* Bitfields in MODE0 */
+#define HSMC_READ_MODE_OFFSET 0
+#define HSMC_READ_MODE_SIZE 1
+#define HSMC_WRITE_MODE_OFFSET 1
+#define HSMC_WRITE_MODE_SIZE 1
+#define HSMC_EXNW_MODE_OFFSET 4
+#define HSMC_EXNW_MODE_SIZE 2
+#define HSMC_BAT_OFFSET 8
+#define HSMC_BAT_SIZE 1
+#define HSMC_DBW_OFFSET 12
+#define HSMC_DBW_SIZE 2
+#define HSMC_TDF_CYCLES_OFFSET 16
+#define HSMC_TDF_CYCLES_SIZE 4
+#define HSMC_TDF_MODE_OFFSET 20
+#define HSMC_TDF_MODE_SIZE 1
+#define HSMC_PMEN_OFFSET 24
+#define HSMC_PMEN_SIZE 1
+#define HSMC_PS_OFFSET 28
+#define HSMC_PS_SIZE 2
+
+/* Constants for READ_MODE */
+#define HSMC_READ_MODE_NCS_CONTROLLED 0
+#define HSMC_READ_MODE_NRD_CONTROLLED 1
+
+/* Constants for WRITE_MODE */
+#define HSMC_WRITE_MODE_NCS_CONTROLLED 0
+#define HSMC_WRITE_MODE_NWE_CONTROLLED 1
+
+/* Constants for EXNW_MODE */
+#define HSMC_EXNW_MODE_DISABLED 0
+#define HSMC_EXNW_MODE_RESERVED 1
+#define HSMC_EXNW_MODE_FROZEN 2
+#define HSMC_EXNW_MODE_READY 3
+
+/* Constants for BAT */
+#define HSMC_BAT_BYTE_SELECT 0
+#define HSMC_BAT_BYTE_WRITE 1
+
+/* Constants for DBW */
+#define HSMC_DBW_8_BITS 0
+#define HSMC_DBW_16_BITS 1
+#define HSMC_DBW_32_BITS 2
+
+/* Bit manipulation macros */
+#define HSMC_BIT(name) \
+ (1 << HSMC_##name##_OFFSET)
+#define HSMC_BF(name,value) \
+ (((value) & ((1 << HSMC_##name##_SIZE) - 1)) \
+ << HSMC_##name##_OFFSET)
+#define HSMC_BFEXT(name,value) \
+ (((value) >> HSMC_##name##_OFFSET) \
+ & ((1 << HSMC_##name##_SIZE) - 1))
+#define HSMC_BFINS(name,value,old) \
+ (((old) & ~(((1 << HSMC_##name##_SIZE) - 1) \
+ << HSMC_##name##_OFFSET)) | HSMC_BF(name,value))
+
+/* Register access macros */
+#define hsmc_readl(port,reg) \
+ readl((port)->regs + HSMC_##reg)
+#define hsmc_writel(port,reg,value) \
+ writel((value), (port)->regs + HSMC_##reg)
+
+#endif /* __ASM_AVR32_HSMC_H__ */
diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c
new file mode 100644
index 000000000000..74f8c9f2f03d
--- /dev/null
+++ b/arch/avr32/mach-at32ap/intc.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+
+#include "intc.h"
+
+struct intc {
+ void __iomem *regs;
+ struct irq_chip chip;
+};
+
+extern struct platform_device at32_intc0_device;
+
+/*
+ * TODO: We may be able to implement mask/unmask by setting IxM flags
+ * in the status register.
+ */
+static void intc_mask_irq(unsigned int irq)
+{
+
+}
+
+static void intc_unmask_irq(unsigned int irq)
+{
+
+}
+
+static struct intc intc0 = {
+ .chip = {
+ .name = "intc",
+ .mask = intc_mask_irq,
+ .unmask = intc_unmask_irq,
+ },
+};
+
+/*
+ * All interrupts go via intc at some point.
+ */
+asmlinkage void do_IRQ(int level, struct pt_regs *regs)
+{
+ struct irq_desc *desc;
+ unsigned int irq;
+ unsigned long status_reg;
+
+ local_irq_disable();
+
+ irq_enter();
+
+ irq = intc_readl(&intc0, INTCAUSE0 - 4 * level);
+ desc = irq_desc + irq;
+ desc->handle_irq(irq, desc, regs);
+
+ /*
+ * Clear all interrupt level masks so that we may handle
+ * interrupts during softirq processing. If this is a nested
+ * interrupt, interrupts must stay globally disabled until we
+ * return.
+ */
+ status_reg = sysreg_read(SR);
+ status_reg &= ~(SYSREG_BIT(I0M) | SYSREG_BIT(I1M)
+ | SYSREG_BIT(I2M) | SYSREG_BIT(I3M));
+ sysreg_write(SR, status_reg);
+
+ irq_exit();
+}
+
+void __init init_IRQ(void)
+{
+ extern void _evba(void);
+ extern void irq_level0(void);
+ struct resource *regs;
+ struct clk *pclk;
+ unsigned int i;
+ u32 offset, readback;
+
+ regs = platform_get_resource(&at32_intc0_device, IORESOURCE_MEM, 0);
+ if (!regs) {
+ printk(KERN_EMERG "intc: no mmio resource defined\n");
+ goto fail;
+ }
+ pclk = clk_get(&at32_intc0_device.dev, "pclk");
+ if (IS_ERR(pclk)) {
+ printk(KERN_EMERG "intc: no clock defined\n");
+ goto fail;
+ }
+
+ clk_enable(pclk);
+
+ intc0.regs = ioremap(regs->start, regs->end - regs->start + 1);
+ if (!intc0.regs) {
+ printk(KERN_EMERG "intc: failed to map registers (0x%08lx)\n",
+ (unsigned long)regs->start);
+ goto fail;
+ }
+
+ /*
+ * Initialize all interrupts to level 0 (lowest priority). The
+ * priority level may be changed by calling
+ * irq_set_priority().
+ *
+ */
+ offset = (unsigned long)&irq_level0 - (unsigned long)&_evba;
+ for (i = 0; i < NR_INTERNAL_IRQS; i++) {
+ intc_writel(&intc0, INTPR0 + 4 * i, offset);
+ readback = intc_readl(&intc0, INTPR0 + 4 * i);
+ if (readback == offset)
+ set_irq_chip_and_handler(i, &intc0.chip,
+ handle_simple_irq);
+ }
+
+ /* Unmask all interrupt levels */
+ sysreg_write(SR, (sysreg_read(SR)
+ & ~(SR_I3M | SR_I2M | SR_I1M | SR_I0M)));
+
+ return;
+
+fail:
+ panic("Interrupt controller initialization failed!\n");
+}
+
diff --git a/arch/avr32/mach-at32ap/intc.h b/arch/avr32/mach-at32ap/intc.h
new file mode 100644
index 000000000000..d289ca2fff13
--- /dev/null
+++ b/arch/avr32/mach-at32ap/intc.h
@@ -0,0 +1,327 @@
+/*
+ * Automatically generated by gen-header.xsl
+ */
+#ifndef __ASM_AVR32_PERIHP_INTC_H__
+#define __ASM_AVR32_PERIHP_INTC_H__
+
+#define INTC_NUM_INT_GRPS 33
+
+#define INTC_INTPR0 0x0
+# define INTC_INTPR0_INTLEV_OFFSET 30
+# define INTC_INTPR0_INTLEV_SIZE 2
+# define INTC_INTPR0_OFFSET_OFFSET 0
+# define INTC_INTPR0_OFFSET_SIZE 24
+#define INTC_INTREQ0 0x100
+# define INTC_INTREQ0_IREQUEST0_OFFSET 0
+# define INTC_INTREQ0_IREQUEST0_SIZE 1
+# define INTC_INTREQ0_IREQUEST1_OFFSET 1
+# define INTC_INTREQ0_IREQUEST1_SIZE 1
+#define INTC_INTPR1 0x4
+# define INTC_INTPR1_INTLEV_OFFSET 30
+# define INTC_INTPR1_INTLEV_SIZE 2
+# define INTC_INTPR1_OFFSET_OFFSET 0
+# define INTC_INTPR1_OFFSET_SIZE 24
+#define INTC_INTREQ1 0x104
+# define INTC_INTREQ1_IREQUEST32_OFFSET 0
+# define INTC_INTREQ1_IREQUEST32_SIZE 1
+# define INTC_INTREQ1_IREQUEST33_OFFSET 1
+# define INTC_INTREQ1_IREQUEST33_SIZE 1
+# define INTC_INTREQ1_IREQUEST34_OFFSET 2
+# define INTC_INTREQ1_IREQUEST34_SIZE 1
+# define INTC_INTREQ1_IREQUEST35_OFFSET 3
+# define INTC_INTREQ1_IREQUEST35_SIZE 1
+# define INTC_INTREQ1_IREQUEST36_OFFSET 4
+# define INTC_INTREQ1_IREQUEST36_SIZE 1
+# define INTC_INTREQ1_IREQUEST37_OFFSET 5
+# define INTC_INTREQ1_IREQUEST37_SIZE 1
+#define INTC_INTPR2 0x8
+# define INTC_INTPR2_INTLEV_OFFSET 30
+# define INTC_INTPR2_INTLEV_SIZE 2
+# define INTC_INTPR2_OFFSET_OFFSET 0
+# define INTC_INTPR2_OFFSET_SIZE 24
+#define INTC_INTREQ2 0x108
+# define INTC_INTREQ2_IREQUEST64_OFFSET 0
+# define INTC_INTREQ2_IREQUEST64_SIZE 1
+# define INTC_INTREQ2_IREQUEST65_OFFSET 1
+# define INTC_INTREQ2_IREQUEST65_SIZE 1
+# define INTC_INTREQ2_IREQUEST66_OFFSET 2
+# define INTC_INTREQ2_IREQUEST66_SIZE 1
+# define INTC_INTREQ2_IREQUEST67_OFFSET 3
+# define INTC_INTREQ2_IREQUEST67_SIZE 1
+# define INTC_INTREQ2_IREQUEST68_OFFSET 4
+# define INTC_INTREQ2_IREQUEST68_SIZE 1
+#define INTC_INTPR3 0xc
+# define INTC_INTPR3_INTLEV_OFFSET 30
+# define INTC_INTPR3_INTLEV_SIZE 2
+# define INTC_INTPR3_OFFSET_OFFSET 0
+# define INTC_INTPR3_OFFSET_SIZE 24
+#define INTC_INTREQ3 0x10c
+# define INTC_INTREQ3_IREQUEST96_OFFSET 0
+# define INTC_INTREQ3_IREQUEST96_SIZE 1
+#define INTC_INTPR4 0x10
+# define INTC_INTPR4_INTLEV_OFFSET 30
+# define INTC_INTPR4_INTLEV_SIZE 2
+# define INTC_INTPR4_OFFSET_OFFSET 0
+# define INTC_INTPR4_OFFSET_SIZE 24
+#define INTC_INTREQ4 0x110
+# define INTC_INTREQ4_IREQUEST128_OFFSET 0
+# define INTC_INTREQ4_IREQUEST128_SIZE 1
+#define INTC_INTPR5 0x14
+# define INTC_INTPR5_INTLEV_OFFSET 30
+# define INTC_INTPR5_INTLEV_SIZE 2
+# define INTC_INTPR5_OFFSET_OFFSET 0
+# define INTC_INTPR5_OFFSET_SIZE 24
+#define INTC_INTREQ5 0x114
+# define INTC_INTREQ5_IREQUEST160_OFFSET 0
+# define INTC_INTREQ5_IREQUEST160_SIZE 1
+#define INTC_INTPR6 0x18
+# define INTC_INTPR6_INTLEV_OFFSET 30
+# define INTC_INTPR6_INTLEV_SIZE 2
+# define INTC_INTPR6_OFFSET_OFFSET 0
+# define INTC_INTPR6_OFFSET_SIZE 24
+#define INTC_INTREQ6 0x118
+# define INTC_INTREQ6_IREQUEST192_OFFSET 0
+# define INTC_INTREQ6_IREQUEST192_SIZE 1
+#define INTC_INTPR7 0x1c
+# define INTC_INTPR7_INTLEV_OFFSET 30
+# define INTC_INTPR7_INTLEV_SIZE 2
+# define INTC_INTPR7_OFFSET_OFFSET 0
+# define INTC_INTPR7_OFFSET_SIZE 24
+#define INTC_INTREQ7 0x11c
+# define INTC_INTREQ7_IREQUEST224_OFFSET 0
+# define INTC_INTREQ7_IREQUEST224_SIZE 1
+#define INTC_INTPR8 0x20
+# define INTC_INTPR8_INTLEV_OFFSET 30
+# define INTC_INTPR8_INTLEV_SIZE 2
+# define INTC_INTPR8_OFFSET_OFFSET 0
+# define INTC_INTPR8_OFFSET_SIZE 24
+#define INTC_INTREQ8 0x120
+# define INTC_INTREQ8_IREQUEST256_OFFSET 0
+# define INTC_INTREQ8_IREQUEST256_SIZE 1
+#define INTC_INTPR9 0x24
+# define INTC_INTPR9_INTLEV_OFFSET 30
+# define INTC_INTPR9_INTLEV_SIZE 2
+# define INTC_INTPR9_OFFSET_OFFSET 0
+# define INTC_INTPR9_OFFSET_SIZE 24
+#define INTC_INTREQ9 0x124
+# define INTC_INTREQ9_IREQUEST288_OFFSET 0
+# define INTC_INTREQ9_IREQUEST288_SIZE 1
+#define INTC_INTPR10 0x28
+# define INTC_INTPR10_INTLEV_OFFSET 30
+# define INTC_INTPR10_INTLEV_SIZE 2
+# define INTC_INTPR10_OFFSET_OFFSET 0
+# define INTC_INTPR10_OFFSET_SIZE 24
+#define INTC_INTREQ10 0x128
+# define INTC_INTREQ10_IREQUEST320_OFFSET 0
+# define INTC_INTREQ10_IREQUEST320_SIZE 1
+#define INTC_INTPR11 0x2c
+# define INTC_INTPR11_INTLEV_OFFSET 30
+# define INTC_INTPR11_INTLEV_SIZE 2
+# define INTC_INTPR11_OFFSET_OFFSET 0
+# define INTC_INTPR11_OFFSET_SIZE 24
+#define INTC_INTREQ11 0x12c
+# define INTC_INTREQ11_IREQUEST352_OFFSET 0
+# define INTC_INTREQ11_IREQUEST352_SIZE 1
+#define INTC_INTPR12 0x30
+# define INTC_INTPR12_INTLEV_OFFSET 30
+# define INTC_INTPR12_INTLEV_SIZE 2
+# define INTC_INTPR12_OFFSET_OFFSET 0
+# define INTC_INTPR12_OFFSET_SIZE 24
+#define INTC_INTREQ12 0x130
+# define INTC_INTREQ12_IREQUEST384_OFFSET 0
+# define INTC_INTREQ12_IREQUEST384_SIZE 1
+#define INTC_INTPR13 0x34
+# define INTC_INTPR13_INTLEV_OFFSET 30
+# define INTC_INTPR13_INTLEV_SIZE 2
+# define INTC_INTPR13_OFFSET_OFFSET 0
+# define INTC_INTPR13_OFFSET_SIZE 24
+#define INTC_INTREQ13 0x134
+# define INTC_INTREQ13_IREQUEST416_OFFSET 0
+# define INTC_INTREQ13_IREQUEST416_SIZE 1
+#define INTC_INTPR14 0x38
+# define INTC_INTPR14_INTLEV_OFFSET 30
+# define INTC_INTPR14_INTLEV_SIZE 2
+# define INTC_INTPR14_OFFSET_OFFSET 0
+# define INTC_INTPR14_OFFSET_SIZE 24
+#define INTC_INTREQ14 0x138
+# define INTC_INTREQ14_IREQUEST448_OFFSET 0
+# define INTC_INTREQ14_IREQUEST448_SIZE 1
+#define INTC_INTPR15 0x3c
+# define INTC_INTPR15_INTLEV_OFFSET 30
+# define INTC_INTPR15_INTLEV_SIZE 2
+# define INTC_INTPR15_OFFSET_OFFSET 0
+# define INTC_INTPR15_OFFSET_SIZE 24
+#define INTC_INTREQ15 0x13c
+# define INTC_INTREQ15_IREQUEST480_OFFSET 0
+# define INTC_INTREQ15_IREQUEST480_SIZE 1
+#define INTC_INTPR16 0x40
+# define INTC_INTPR16_INTLEV_OFFSET 30
+# define INTC_INTPR16_INTLEV_SIZE 2
+# define INTC_INTPR16_OFFSET_OFFSET 0
+# define INTC_INTPR16_OFFSET_SIZE 24
+#define INTC_INTREQ16 0x140
+# define INTC_INTREQ16_IREQUEST512_OFFSET 0
+# define INTC_INTREQ16_IREQUEST512_SIZE 1
+#define INTC_INTPR17 0x44
+# define INTC_INTPR17_INTLEV_OFFSET 30
+# define INTC_INTPR17_INTLEV_SIZE 2
+# define INTC_INTPR17_OFFSET_OFFSET 0
+# define INTC_INTPR17_OFFSET_SIZE 24
+#define INTC_INTREQ17 0x144
+# define INTC_INTREQ17_IREQUEST544_OFFSET 0
+# define INTC_INTREQ17_IREQUEST544_SIZE 1
+#define INTC_INTPR18 0x48
+# define INTC_INTPR18_INTLEV_OFFSET 30
+# define INTC_INTPR18_INTLEV_SIZE 2
+# define INTC_INTPR18_OFFSET_OFFSET 0
+# define INTC_INTPR18_OFFSET_SIZE 24
+#define INTC_INTREQ18 0x148
+# define INTC_INTREQ18_IREQUEST576_OFFSET 0
+# define INTC_INTREQ18_IREQUEST576_SIZE 1
+#define INTC_INTPR19 0x4c
+# define INTC_INTPR19_INTLEV_OFFSET 30
+# define INTC_INTPR19_INTLEV_SIZE 2
+# define INTC_INTPR19_OFFSET_OFFSET 0
+# define INTC_INTPR19_OFFSET_SIZE 24
+#define INTC_INTREQ19 0x14c
+# define INTC_INTREQ19_IREQUEST608_OFFSET 0
+# define INTC_INTREQ19_IREQUEST608_SIZE 1
+# define INTC_INTREQ19_IREQUEST609_OFFSET 1
+# define INTC_INTREQ19_IREQUEST609_SIZE 1
+# define INTC_INTREQ19_IREQUEST610_OFFSET 2
+# define INTC_INTREQ19_IREQUEST610_SIZE 1
+# define INTC_INTREQ19_IREQUEST611_OFFSET 3
+# define INTC_INTREQ19_IREQUEST611_SIZE 1
+#define INTC_INTPR20 0x50
+# define INTC_INTPR20_INTLEV_OFFSET 30
+# define INTC_INTPR20_INTLEV_SIZE 2
+# define INTC_INTPR20_OFFSET_OFFSET 0
+# define INTC_INTPR20_OFFSET_SIZE 24
+#define INTC_INTREQ20 0x150
+# define INTC_INTREQ20_IREQUEST640_OFFSET 0
+# define INTC_INTREQ20_IREQUEST640_SIZE 1
+#define INTC_INTPR21 0x54
+# define INTC_INTPR21_INTLEV_OFFSET 30
+# define INTC_INTPR21_INTLEV_SIZE 2
+# define INTC_INTPR21_OFFSET_OFFSET 0
+# define INTC_INTPR21_OFFSET_SIZE 24
+#define INTC_INTREQ21 0x154
+# define INTC_INTREQ21_IREQUEST672_OFFSET 0
+# define INTC_INTREQ21_IREQUEST672_SIZE 1
+#define INTC_INTPR22 0x58
+# define INTC_INTPR22_INTLEV_OFFSET 30
+# define INTC_INTPR22_INTLEV_SIZE 2
+# define INTC_INTPR22_OFFSET_OFFSET 0
+# define INTC_INTPR22_OFFSET_SIZE 24
+#define INTC_INTREQ22 0x158
+# define INTC_INTREQ22_IREQUEST704_OFFSET 0
+# define INTC_INTREQ22_IREQUEST704_SIZE 1
+# define INTC_INTREQ22_IREQUEST705_OFFSET 1
+# define INTC_INTREQ22_IREQUEST705_SIZE 1
+# define INTC_INTREQ22_IREQUEST706_OFFSET 2
+# define INTC_INTREQ22_IREQUEST706_SIZE 1
+#define INTC_INTPR23 0x5c
+# define INTC_INTPR23_INTLEV_OFFSET 30
+# define INTC_INTPR23_INTLEV_SIZE 2
+# define INTC_INTPR23_OFFSET_OFFSET 0
+# define INTC_INTPR23_OFFSET_SIZE 24
+#define INTC_INTREQ23 0x15c
+# define INTC_INTREQ23_IREQUEST736_OFFSET 0
+# define INTC_INTREQ23_IREQUEST736_SIZE 1
+# define INTC_INTREQ23_IREQUEST737_OFFSET 1
+# define INTC_INTREQ23_IREQUEST737_SIZE 1
+# define INTC_INTREQ23_IREQUEST738_OFFSET 2
+# define INTC_INTREQ23_IREQUEST738_SIZE 1
+#define INTC_INTPR24 0x60
+# define INTC_INTPR24_INTLEV_OFFSET 30
+# define INTC_INTPR24_INTLEV_SIZE 2
+# define INTC_INTPR24_OFFSET_OFFSET 0
+# define INTC_INTPR24_OFFSET_SIZE 24
+#define INTC_INTREQ24 0x160
+# define INTC_INTREQ24_IREQUEST768_OFFSET 0
+# define INTC_INTREQ24_IREQUEST768_SIZE 1
+#define INTC_INTPR25 0x64
+# define INTC_INTPR25_INTLEV_OFFSET 30
+# define INTC_INTPR25_INTLEV_SIZE 2
+# define INTC_INTPR25_OFFSET_OFFSET 0
+# define INTC_INTPR25_OFFSET_SIZE 24
+#define INTC_INTREQ25 0x164
+# define INTC_INTREQ25_IREQUEST800_OFFSET 0
+# define INTC_INTREQ25_IREQUEST800_SIZE 1
+#define INTC_INTPR26 0x68
+# define INTC_INTPR26_INTLEV_OFFSET 30
+# define INTC_INTPR26_INTLEV_SIZE 2
+# define INTC_INTPR26_OFFSET_OFFSET 0
+# define INTC_INTPR26_OFFSET_SIZE 24
+#define INTC_INTREQ26 0x168
+# define INTC_INTREQ26_IREQUEST832_OFFSET 0
+# define INTC_INTREQ26_IREQUEST832_SIZE 1
+#define INTC_INTPR27 0x6c
+# define INTC_INTPR27_INTLEV_OFFSET 30
+# define INTC_INTPR27_INTLEV_SIZE 2
+# define INTC_INTPR27_OFFSET_OFFSET 0
+# define INTC_INTPR27_OFFSET_SIZE 24
+#define INTC_INTREQ27 0x16c
+# define INTC_INTREQ27_IREQUEST864_OFFSET 0
+# define INTC_INTREQ27_IREQUEST864_SIZE 1
+#define INTC_INTPR28 0x70
+# define INTC_INTPR28_INTLEV_OFFSET 30
+# define INTC_INTPR28_INTLEV_SIZE 2
+# define INTC_INTPR28_OFFSET_OFFSET 0
+# define INTC_INTPR28_OFFSET_SIZE 24
+#define INTC_INTREQ28 0x170
+# define INTC_INTREQ28_IREQUEST896_OFFSET 0
+# define INTC_INTREQ28_IREQUEST896_SIZE 1
+#define INTC_INTPR29 0x74
+# define INTC_INTPR29_INTLEV_OFFSET 30
+# define INTC_INTPR29_INTLEV_SIZE 2
+# define INTC_INTPR29_OFFSET_OFFSET 0
+# define INTC_INTPR29_OFFSET_SIZE 24
+#define INTC_INTREQ29 0x174
+# define INTC_INTREQ29_IREQUEST928_OFFSET 0
+# define INTC_INTREQ29_IREQUEST928_SIZE 1
+#define INTC_INTPR30 0x78
+# define INTC_INTPR30_INTLEV_OFFSET 30
+# define INTC_INTPR30_INTLEV_SIZE 2
+# define INTC_INTPR30_OFFSET_OFFSET 0
+# define INTC_INTPR30_OFFSET_SIZE 24
+#define INTC_INTREQ30 0x178
+# define INTC_INTREQ30_IREQUEST960_OFFSET 0
+# define INTC_INTREQ30_IREQUEST960_SIZE 1
+#define INTC_INTPR31 0x7c
+# define INTC_INTPR31_INTLEV_OFFSET 30
+# define INTC_INTPR31_INTLEV_SIZE 2
+# define INTC_INTPR31_OFFSET_OFFSET 0
+# define INTC_INTPR31_OFFSET_SIZE 24
+#define INTC_INTREQ31 0x17c
+# define INTC_INTREQ31_IREQUEST992_OFFSET 0
+# define INTC_INTREQ31_IREQUEST992_SIZE 1
+#define INTC_INTPR32 0x80
+# define INTC_INTPR32_INTLEV_OFFSET 30
+# define INTC_INTPR32_INTLEV_SIZE 2
+# define INTC_INTPR32_OFFSET_OFFSET 0
+# define INTC_INTPR32_OFFSET_SIZE 24
+#define INTC_INTREQ32 0x180
+# define INTC_INTREQ32_IREQUEST1024_OFFSET 0
+# define INTC_INTREQ32_IREQUEST1024_SIZE 1
+#define INTC_INTCAUSE0 0x20c
+# define INTC_INTCAUSE0_CAUSEGRP_OFFSET 0
+# define INTC_INTCAUSE0_CAUSEGRP_SIZE 6
+#define INTC_INTCAUSE1 0x208
+# define INTC_INTCAUSE1_CAUSEGRP_OFFSET 0
+# define INTC_INTCAUSE1_CAUSEGRP_SIZE 6
+#define INTC_INTCAUSE2 0x204
+# define INTC_INTCAUSE2_CAUSEGRP_OFFSET 0
+# define INTC_INTCAUSE2_CAUSEGRP_SIZE 6
+#define INTC_INTCAUSE3 0x200
+# define INTC_INTCAUSE3_CAUSEGRP_OFFSET 0
+# define INTC_INTCAUSE3_CAUSEGRP_SIZE 6
+
+#define INTC_BIT(name) (1 << INTC_##name##_OFFSET)
+#define INTC_MKBF(name, value) (((value) & ((1 << INTC_##name##_SIZE) - 1)) << INTC_##name##_OFFSET)
+#define INTC_GETBF(name, value) (((value) >> INTC_##name##_OFFSET) & ((1 << INTC_##name##_SIZE) - 1))
+
+#define intc_readl(port,reg) readl((port)->regs + INTC_##reg)
+#define intc_writel(port,reg,value) writel((value), (port)->regs + INTC_##reg)
+
+#endif /* __ASM_AVR32_PERIHP_INTC_H__ */
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
new file mode 100644
index 000000000000..d3aabfca8598
--- /dev/null
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -0,0 +1,118 @@
+/*
+ * Atmel PIO2 Port Multiplexer support
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+
+#include <asm/arch/portmux.h>
+
+#include "pio.h"
+
+#define MAX_NR_PIO_DEVICES 8
+
+struct pio_device {
+ void __iomem *regs;
+ const struct platform_device *pdev;
+ struct clk *clk;
+ u32 alloc_mask;
+ char name[32];
+};
+
+static struct pio_device pio_dev[MAX_NR_PIO_DEVICES];
+
+void portmux_set_func(unsigned int portmux_id, unsigned int pin_id,
+ unsigned int function_id)
+{
+ struct pio_device *pio;
+ u32 mask = 1 << pin_id;
+
+ BUG_ON(portmux_id >= MAX_NR_PIO_DEVICES);
+
+ pio = &pio_dev[portmux_id];
+
+ if (function_id)
+ pio_writel(pio, BSR, mask);
+ else
+ pio_writel(pio, ASR, mask);
+ pio_writel(pio, PDR, mask);
+}
+
+static int __init pio_probe(struct platform_device *pdev)
+{
+ struct pio_device *pio = NULL;
+
+ BUG_ON(pdev->id >= MAX_NR_PIO_DEVICES);
+ pio = &pio_dev[pdev->id];
+ BUG_ON(!pio->regs);
+
+ /* TODO: Interrupts */
+
+ platform_set_drvdata(pdev, pio);
+
+ printk(KERN_INFO "%s: Atmel Port Multiplexer at 0x%p (irq %d)\n",
+ pio->name, pio->regs, platform_get_irq(pdev, 0));
+
+ return 0;
+}
+
+static struct platform_driver pio_driver = {
+ .probe = pio_probe,
+ .driver = {
+ .name = "pio",
+ },
+};
+
+static int __init pio_init(void)
+{
+ return platform_driver_register(&pio_driver);
+}
+subsys_initcall(pio_init);
+
+void __init at32_init_pio(struct platform_device *pdev)
+{
+ struct resource *regs;
+ struct pio_device *pio;
+
+ if (pdev->id > MAX_NR_PIO_DEVICES) {
+ dev_err(&pdev->dev, "only %d PIO devices supported\n",
+ MAX_NR_PIO_DEVICES);
+ return;
+ }
+
+ pio = &pio_dev[pdev->id];
+ snprintf(pio->name, sizeof(pio->name), "pio%d", pdev->id);
+
+ regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!regs) {
+ dev_err(&pdev->dev, "no mmio resource defined\n");
+ return;
+ }
+
+ pio->clk = clk_get(&pdev->dev, "mck");
+ if (IS_ERR(pio->clk))
+ /*
+ * This is a fatal error, but if we continue we might
+ * be so lucky that we manage to initialize the
+ * console and display this message...
+ */
+ dev_err(&pdev->dev, "no mck clock defined\n");
+ else
+ clk_enable(pio->clk);
+
+ pio->pdev = pdev;
+ pio->regs = ioremap(regs->start, regs->end - regs->start + 1);
+
+ pio_writel(pio, ODR, ~0UL);
+ pio_writel(pio, PER, ~0UL);
+}
diff --git a/arch/avr32/mach-at32ap/pio.h b/arch/avr32/mach-at32ap/pio.h
new file mode 100644
index 000000000000..cfea12351599
--- /dev/null
+++ b/arch/avr32/mach-at32ap/pio.h
@@ -0,0 +1,178 @@
+/*
+ * Atmel PIO2 Port Multiplexer support
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ARCH_AVR32_AT32AP_PIO_H__
+#define __ARCH_AVR32_AT32AP_PIO_H__
+
+/* PIO register offsets */
+#define PIO_PER 0x0000
+#define PIO_PDR 0x0004
+#define PIO_PSR 0x0008
+#define PIO_OER 0x0010
+#define PIO_ODR 0x0014
+#define PIO_OSR 0x0018
+#define PIO_IFER 0x0020
+#define PIO_IFDR 0x0024
+#define PIO_ISFR 0x0028
+#define PIO_SODR 0x0030
+#define PIO_CODR 0x0034
+#define PIO_ODSR 0x0038
+#define PIO_PDSR 0x003c
+#define PIO_IER 0x0040
+#define PIO_IDR 0x0044
+#define PIO_IMR 0x0048
+#define PIO_ISR 0x004c
+#define PIO_MDER 0x0050
+#define PIO_MDDR 0x0054
+#define PIO_MDSR 0x0058
+#define PIO_PUDR 0x0060
+#define PIO_PUER 0x0064
+#define PIO_PUSR 0x0068
+#define PIO_ASR 0x0070
+#define PIO_BSR 0x0074
+#define PIO_ABSR 0x0078
+#define PIO_OWER 0x00a0
+#define PIO_OWDR 0x00a4
+#define PIO_OWSR 0x00a8
+
+/* Bitfields in PER */
+
+/* Bitfields in PDR */
+
+/* Bitfields in PSR */
+
+/* Bitfields in OER */
+
+/* Bitfields in ODR */
+
+/* Bitfields in OSR */
+
+/* Bitfields in IFER */
+
+/* Bitfields in IFDR */
+
+/* Bitfields in ISFR */
+
+/* Bitfields in SODR */
+
+/* Bitfields in CODR */
+
+/* Bitfields in ODSR */
+
+/* Bitfields in PDSR */
+
+/* Bitfields in IER */
+
+/* Bitfields in IDR */
+
+/* Bitfields in IMR */
+
+/* Bitfields in ISR */
+
+/* Bitfields in MDER */
+
+/* Bitfields in MDDR */
+
+/* Bitfields in MDSR */
+
+/* Bitfields in PUDR */
+
+/* Bitfields in PUER */
+
+/* Bitfields in PUSR */
+
+/* Bitfields in ASR */
+
+/* Bitfields in BSR */
+
+/* Bitfields in ABSR */
+#define PIO_P0_OFFSET 0
+#define PIO_P0_SIZE 1
+#define PIO_P1_OFFSET 1
+#define PIO_P1_SIZE 1
+#define PIO_P2_OFFSET 2
+#define PIO_P2_SIZE 1
+#define PIO_P3_OFFSET 3
+#define PIO_P3_SIZE 1
+#define PIO_P4_OFFSET 4
+#define PIO_P4_SIZE 1
+#define PIO_P5_OFFSET 5
+#define PIO_P5_SIZE 1
+#define PIO_P6_OFFSET 6
+#define PIO_P6_SIZE 1
+#define PIO_P7_OFFSET 7
+#define PIO_P7_SIZE 1
+#define PIO_P8_OFFSET 8
+#define PIO_P8_SIZE 1
+#define PIO_P9_OFFSET 9
+#define PIO_P9_SIZE 1
+#define PIO_P10_OFFSET 10
+#define PIO_P10_SIZE 1
+#define PIO_P11_OFFSET 11
+#define PIO_P11_SIZE 1
+#define PIO_P12_OFFSET 12
+#define PIO_P12_SIZE 1
+#define PIO_P13_OFFSET 13
+#define PIO_P13_SIZE 1
+#define PIO_P14_OFFSET 14
+#define PIO_P14_SIZE 1
+#define PIO_P15_OFFSET 15
+#define PIO_P15_SIZE 1
+#define PIO_P16_OFFSET 16
+#define PIO_P16_SIZE 1
+#define PIO_P17_OFFSET 17
+#define PIO_P17_SIZE 1
+#define PIO_P18_OFFSET 18
+#define PIO_P18_SIZE 1
+#define PIO_P19_OFFSET 19
+#define PIO_P19_SIZE 1
+#define PIO_P20_OFFSET 20
+#define PIO_P20_SIZE 1
+#define PIO_P21_OFFSET 21
+#define PIO_P21_SIZE 1
+#define PIO_P22_OFFSET 22
+#define PIO_P22_SIZE 1
+#define PIO_P23_OFFSET 23
+#define PIO_P23_SIZE 1
+#define PIO_P24_OFFSET 24
+#define PIO_P24_SIZE 1
+#define PIO_P25_OFFSET 25
+#define PIO_P25_SIZE 1
+#define PIO_P26_OFFSET 26
+#define PIO_P26_SIZE 1
+#define PIO_P27_OFFSET 27
+#define PIO_P27_SIZE 1
+#define PIO_P28_OFFSET 28
+#define PIO_P28_SIZE 1
+#define PIO_P29_OFFSET 29
+#define PIO_P29_SIZE 1
+#define PIO_P30_OFFSET 30
+#define PIO_P30_SIZE 1
+#define PIO_P31_OFFSET 31
+#define PIO_P31_SIZE 1
+
+/* Bitfields in OWER */
+
+/* Bitfields in OWDR */
+
+/* Bitfields in OWSR */
+
+/* Bit manipulation macros */
+#define PIO_BIT(name) (1 << PIO_##name##_OFFSET)
+#define PIO_BF(name,value) (((value) & ((1 << PIO_##name##_SIZE) - 1)) << PIO_##name##_OFFSET)
+#define PIO_BFEXT(name,value) (((value) >> PIO_##name##_OFFSET) & ((1 << PIO_##name##_SIZE) - 1))
+#define PIO_BFINS(name,value,old) (((old) & ~(((1 << PIO_##name##_SIZE) - 1) << PIO_##name##_OFFSET)) | PIO_BF(name,value))
+
+/* Register access macros */
+#define pio_readl(port,reg) readl((port)->regs + PIO_##reg)
+#define pio_writel(port,reg,value) writel((value), (port)->regs + PIO_##reg)
+
+void at32_init_pio(struct platform_device *pdev);
+
+#endif /* __ARCH_AVR32_AT32AP_PIO_H__ */
diff --git a/arch/avr32/mach-at32ap/sm.c b/arch/avr32/mach-at32ap/sm.c
new file mode 100644
index 000000000000..03306eb0345e
--- /dev/null
+++ b/arch/avr32/mach-at32ap/sm.c
@@ -0,0 +1,289 @@
+/*
+ * System Manager driver for AT32AP CPUs
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/random.h>
+#include <linux/spinlock.h>
+
+#include <asm/intc.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#include <asm/arch/sm.h>
+
+#include "sm.h"
+
+#define SM_EIM_IRQ_RESOURCE 1
+#define SM_PM_IRQ_RESOURCE 2
+#define SM_RTC_IRQ_RESOURCE 3
+
+#define to_eim(irqc) container_of(irqc, struct at32_sm, irqc)
+
+struct at32_sm system_manager;
+
+int __init at32_sm_init(void)
+{
+ struct resource *regs;
+ struct at32_sm *sm = &system_manager;
+ int ret = -ENXIO;
+
+ regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0);
+ if (!regs)
+ goto fail;
+
+ spin_lock_init(&sm->lock);
+ sm->pdev = &at32_sm_device;
+
+ ret = -ENOMEM;
+ sm->regs = ioremap(regs->start, regs->end - regs->start + 1);
+ if (!sm->regs)
+ goto fail;
+
+ return 0;
+
+fail:
+ printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret);
+ return ret;
+}
+
+/*
+ * External Interrupt Module (EIM).
+ *
+ * EIM gets level- or edge-triggered interrupts of either polarity
+ * from the outside and converts it to active-high level-triggered
+ * interrupts that the internal interrupt controller can handle. EIM
+ * also provides masking/unmasking of interrupts, as well as
+ * acknowledging of edge-triggered interrupts.
+ */
+
+static irqreturn_t spurious_eim_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+ printk(KERN_WARNING "Spurious EIM interrupt %d\n", irq);
+ disable_irq(irq);
+ return IRQ_NONE;
+}
+
+static struct irqaction eim_spurious_action = {
+ .handler = spurious_eim_interrupt,
+};
+
+static irqreturn_t eim_handle_irq(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct irq_controller * irqc = dev_id;
+ struct at32_sm *sm = to_eim(irqc);
+ unsigned long pending;
+
+ /*
+ * No need to disable interrupts globally. The interrupt
+ * level relevant to this group must be masked all the time,
+ * so we know that this particular EIM instance will not be
+ * re-entered.
+ */
+ spin_lock(&sm->lock);
+
+ pending = intc_get_pending(sm->irqc.irq_group);
+ if (unlikely(!pending)) {
+ printk(KERN_ERR "EIM (group %u): No interrupts pending!\n",
+ sm->irqc.irq_group);
+ goto unlock;
+ }
+
+ do {
+ struct irqaction *action;
+ unsigned int i;
+
+ i = fls(pending) - 1;
+ pending &= ~(1 << i);
+ action = sm->action[i];
+
+ /* Acknowledge the interrupt */
+ sm_writel(sm, EIM_ICR, 1 << i);
+
+ spin_unlock(&sm->lock);
+
+ if (action->flags & SA_INTERRUPT)
+ local_irq_disable();
+ action->handler(sm->irqc.first_irq + i, action->dev_id, regs);
+ local_irq_enable();
+ spin_lock(&sm->lock);
+ if (action->flags & SA_SAMPLE_RANDOM)
+ add_interrupt_randomness(sm->irqc.first_irq + i);
+ } while (pending);
+
+unlock:
+ spin_unlock(&sm->lock);
+ return IRQ_HANDLED;
+}
+
+static void eim_mask(struct irq_controller *irqc, unsigned int irq)
+{
+ struct at32_sm *sm = to_eim(irqc);
+ unsigned int i;
+
+ i = irq - sm->irqc.first_irq;
+ sm_writel(sm, EIM_IDR, 1 << i);
+}
+
+static void eim_unmask(struct irq_controller *irqc, unsigned int irq)
+{
+ struct at32_sm *sm = to_eim(irqc);
+ unsigned int i;
+
+ i = irq - sm->irqc.first_irq;
+ sm_writel(sm, EIM_IER, 1 << i);
+}
+
+static int eim_setup(struct irq_controller *irqc, unsigned int irq,
+ struct irqaction *action)
+{
+ struct at32_sm *sm = to_eim(irqc);
+ sm->action[irq - sm->irqc.first_irq] = action;
+ /* Acknowledge earlier interrupts */
+ sm_writel(sm, EIM_ICR, (1<<(irq - sm->irqc.first_irq)));
+ eim_unmask(irqc, irq);
+ return 0;
+}
+
+static void eim_free(struct irq_controller *irqc, unsigned int irq,
+ void *dev)
+{
+ struct at32_sm *sm = to_eim(irqc);
+ eim_mask(irqc, irq);
+ sm->action[irq - sm->irqc.first_irq] = &eim_spurious_action;
+}
+
+static int eim_set_type(struct irq_controller *irqc, unsigned int irq,
+ unsigned int type)
+{
+ struct at32_sm *sm = to_eim(irqc);
+ unsigned long flags;
+ u32 value, pattern;
+
+ spin_lock_irqsave(&sm->lock, flags);
+
+ pattern = 1 << (irq - sm->irqc.first_irq);
+
+ value = sm_readl(sm, EIM_MODE);
+ if (type & IRQ_TYPE_LEVEL)
+ value |= pattern;
+ else
+ value &= ~pattern;
+ sm_writel(sm, EIM_MODE, value);
+ value = sm_readl(sm, EIM_EDGE);
+ if (type & IRQ_EDGE_RISING)
+ value |= pattern;
+ else
+ value &= ~pattern;
+ sm_writel(sm, EIM_EDGE, value);
+ value = sm_readl(sm, EIM_LEVEL);
+ if (type & IRQ_LEVEL_HIGH)
+ value |= pattern;
+ else
+ value &= ~pattern;
+ sm_writel(sm, EIM_LEVEL, value);
+
+ spin_unlock_irqrestore(&sm->lock, flags);
+
+ return 0;
+}
+
+static unsigned int eim_get_type(struct irq_controller *irqc,
+ unsigned int irq)
+{
+ struct at32_sm *sm = to_eim(irqc);
+ unsigned long flags;
+ unsigned int type = 0;
+ u32 mode, edge, level, pattern;
+
+ pattern = 1 << (irq - sm->irqc.first_irq);
+
+ spin_lock_irqsave(&sm->lock, flags);
+ mode = sm_readl(sm, EIM_MODE);
+ edge = sm_readl(sm, EIM_EDGE);
+ level = sm_readl(sm, EIM_LEVEL);
+ spin_unlock_irqrestore(&sm->lock, flags);
+
+ if (mode & pattern)
+ type |= IRQ_TYPE_LEVEL;
+ if (edge & pattern)
+ type |= IRQ_EDGE_RISING;
+ if (level & pattern)
+ type |= IRQ_LEVEL_HIGH;
+
+ return type;
+}
+
+static struct irq_controller_class eim_irq_class = {
+ .typename = "EIM",
+ .handle = eim_handle_irq,
+ .setup = eim_setup,
+ .free = eim_free,
+ .mask = eim_mask,
+ .unmask = eim_unmask,
+ .set_type = eim_set_type,
+ .get_type = eim_get_type,
+};
+
+static int __init eim_init(void)
+{
+ struct at32_sm *sm = &system_manager;
+ unsigned int i;
+ u32 pattern;
+ int ret;
+
+ /*
+ * The EIM is really the same module as SM, so register
+ * mapping, etc. has been taken care of already.
+ */
+
+ /*
+ * Find out how many interrupt lines that are actually
+ * implemented in hardware.
+ */
+ sm_writel(sm, EIM_IDR, ~0UL);
+ sm_writel(sm, EIM_MODE, ~0UL);
+ pattern = sm_readl(sm, EIM_MODE);
+ sm->irqc.nr_irqs = fls(pattern);
+
+ ret = -ENOMEM;
+ sm->action = kmalloc(sizeof(*sm->action) * sm->irqc.nr_irqs,
+ GFP_KERNEL);
+ if (!sm->action)
+ goto out;
+
+ for (i = 0; i < sm->irqc.nr_irqs; i++)
+ sm->action[i] = &eim_spurious_action;
+
+ spin_lock_init(&sm->lock);
+ sm->irqc.irq_group = sm->pdev->resource[SM_EIM_IRQ_RESOURCE].start;
+ sm->irqc.class = &eim_irq_class;
+
+ ret = intc_register_controller(&sm->irqc);
+ if (ret < 0)
+ goto out_free_actions;
+
+ printk("EIM: External Interrupt Module at 0x%p, IRQ group %u\n",
+ sm->regs, sm->irqc.irq_group);
+ printk("EIM: Handling %u external IRQs, starting with IRQ%u\n",
+ sm->irqc.nr_irqs, sm->irqc.first_irq);
+
+ return 0;
+
+out_free_actions:
+ kfree(sm->action);
+out:
+ return ret;
+}
+arch_initcall(eim_init);
diff --git a/arch/avr32/mach-at32ap/sm.h b/arch/avr32/mach-at32ap/sm.h
new file mode 100644
index 000000000000..27565822ae2a
--- /dev/null
+++ b/arch/avr32/mach-at32ap/sm.h
@@ -0,0 +1,240 @@
+/*
+ * Register definitions for SM
+ *
+ * System Manager
+ */
+#ifndef __ASM_AVR32_SM_H__
+#define __ASM_AVR32_SM_H__
+
+/* SM register offsets */
+#define SM_PM_MCCTRL 0x0000
+#define SM_PM_CKSEL 0x0004
+#define SM_PM_CPU_MASK 0x0008
+#define SM_PM_HSB_MASK 0x000c
+#define SM_PM_PBA_MASK 0x0010
+#define SM_PM_PBB_MASK 0x0014
+#define SM_PM_PLL0 0x0020
+#define SM_PM_PLL1 0x0024
+#define SM_PM_VCTRL 0x0030
+#define SM_PM_VMREF 0x0034
+#define SM_PM_VMV 0x0038
+#define SM_PM_IER 0x0040
+#define SM_PM_IDR 0x0044
+#define SM_PM_IMR 0x0048
+#define SM_PM_ISR 0x004c
+#define SM_PM_ICR 0x0050
+#define SM_PM_GCCTRL 0x0060
+#define SM_RTC_CTRL 0x0080
+#define SM_RTC_VAL 0x0084
+#define SM_RTC_TOP 0x0088
+#define SM_RTC_IER 0x0090
+#define SM_RTC_IDR 0x0094
+#define SM_RTC_IMR 0x0098
+#define SM_RTC_ISR 0x009c
+#define SM_RTC_ICR 0x00a0
+#define SM_WDT_CTRL 0x00b0
+#define SM_WDT_CLR 0x00b4
+#define SM_WDT_EXT 0x00b8
+#define SM_RC_RCAUSE 0x00c0
+#define SM_EIM_IER 0x0100
+#define SM_EIM_IDR 0x0104
+#define SM_EIM_IMR 0x0108
+#define SM_EIM_ISR 0x010c
+#define SM_EIM_ICR 0x0110
+#define SM_EIM_MODE 0x0114
+#define SM_EIM_EDGE 0x0118
+#define SM_EIM_LEVEL 0x011c
+#define SM_EIM_TEST 0x0120
+#define SM_EIM_NMIC 0x0124
+
+/* Bitfields in PM_MCCTRL */
+
+/* Bitfields in PM_CKSEL */
+#define SM_CPUSEL_OFFSET 0
+#define SM_CPUSEL_SIZE 3
+#define SM_CPUDIV_OFFSET 7
+#define SM_CPUDIV_SIZE 1
+#define SM_HSBSEL_OFFSET 8
+#define SM_HSBSEL_SIZE 3
+#define SM_HSBDIV_OFFSET 15
+#define SM_HSBDIV_SIZE 1
+#define SM_PBASEL_OFFSET 16
+#define SM_PBASEL_SIZE 3
+#define SM_PBADIV_OFFSET 23
+#define SM_PBADIV_SIZE 1
+#define SM_PBBSEL_OFFSET 24
+#define SM_PBBSEL_SIZE 3
+#define SM_PBBDIV_OFFSET 31
+#define SM_PBBDIV_SIZE 1
+
+/* Bitfields in PM_CPU_MASK */
+
+/* Bitfields in PM_HSB_MASK */
+
+/* Bitfields in PM_PBA_MASK */
+
+/* Bitfields in PM_PBB_MASK */
+
+/* Bitfields in PM_PLL0 */
+#define SM_PLLEN_OFFSET 0
+#define SM_PLLEN_SIZE 1
+#define SM_PLLOSC_OFFSET 1
+#define SM_PLLOSC_SIZE 1
+#define SM_PLLOPT_OFFSET 2
+#define SM_PLLOPT_SIZE 3
+#define SM_PLLDIV_OFFSET 8
+#define SM_PLLDIV_SIZE 8
+#define SM_PLLMUL_OFFSET 16
+#define SM_PLLMUL_SIZE 8
+#define SM_PLLCOUNT_OFFSET 24
+#define SM_PLLCOUNT_SIZE 6
+#define SM_PLLTEST_OFFSET 31
+#define SM_PLLTEST_SIZE 1
+
+/* Bitfields in PM_PLL1 */
+
+/* Bitfields in PM_VCTRL */
+#define SM_VAUTO_OFFSET 0
+#define SM_VAUTO_SIZE 1
+#define SM_PM_VCTRL_VAL_OFFSET 8
+#define SM_PM_VCTRL_VAL_SIZE 7
+
+/* Bitfields in PM_VMREF */
+#define SM_REFSEL_OFFSET 0
+#define SM_REFSEL_SIZE 4
+
+/* Bitfields in PM_VMV */
+#define SM_PM_VMV_VAL_OFFSET 0
+#define SM_PM_VMV_VAL_SIZE 8
+
+/* Bitfields in PM_IER */
+
+/* Bitfields in PM_IDR */
+
+/* Bitfields in PM_IMR */
+
+/* Bitfields in PM_ISR */
+
+/* Bitfields in PM_ICR */
+#define SM_LOCK0_OFFSET 0
+#define SM_LOCK0_SIZE 1
+#define SM_LOCK1_OFFSET 1
+#define SM_LOCK1_SIZE 1
+#define SM_WAKE_OFFSET 2
+#define SM_WAKE_SIZE 1
+#define SM_VOK_OFFSET 3
+#define SM_VOK_SIZE 1
+#define SM_VMRDY_OFFSET 4
+#define SM_VMRDY_SIZE 1
+#define SM_CKRDY_OFFSET 5
+#define SM_CKRDY_SIZE 1
+
+/* Bitfields in PM_GCCTRL */
+#define SM_OSCSEL_OFFSET 0
+#define SM_OSCSEL_SIZE 1
+#define SM_PLLSEL_OFFSET 1
+#define SM_PLLSEL_SIZE 1
+#define SM_CEN_OFFSET 2
+#define SM_CEN_SIZE 1
+#define SM_CPC_OFFSET 3
+#define SM_CPC_SIZE 1
+#define SM_DIVEN_OFFSET 4
+#define SM_DIVEN_SIZE 1
+#define SM_DIV_OFFSET 8
+#define SM_DIV_SIZE 8
+
+/* Bitfields in RTC_CTRL */
+#define SM_PCLR_OFFSET 1
+#define SM_PCLR_SIZE 1
+#define SM_TOPEN_OFFSET 2
+#define SM_TOPEN_SIZE 1
+#define SM_CLKEN_OFFSET 3
+#define SM_CLKEN_SIZE 1
+#define SM_PSEL_OFFSET 8
+#define SM_PSEL_SIZE 16
+
+/* Bitfields in RTC_VAL */
+#define SM_RTC_VAL_VAL_OFFSET 0
+#define SM_RTC_VAL_VAL_SIZE 31
+
+/* Bitfields in RTC_TOP */
+#define SM_RTC_TOP_VAL_OFFSET 0
+#define SM_RTC_TOP_VAL_SIZE 32
+
+/* Bitfields in RTC_IER */
+
+/* Bitfields in RTC_IDR */
+
+/* Bitfields in RTC_IMR */
+
+/* Bitfields in RTC_ISR */
+
+/* Bitfields in RTC_ICR */
+#define SM_TOPI_OFFSET 0
+#define SM_TOPI_SIZE 1
+
+/* Bitfields in WDT_CTRL */
+#define SM_KEY_OFFSET 24
+#define SM_KEY_SIZE 8
+
+/* Bitfields in WDT_CLR */
+
+/* Bitfields in WDT_EXT */
+
+/* Bitfields in RC_RCAUSE */
+#define SM_POR_OFFSET 0
+#define SM_POR_SIZE 1
+#define SM_BOD_OFFSET 1
+#define SM_BOD_SIZE 1
+#define SM_EXT_OFFSET 2
+#define SM_EXT_SIZE 1
+#define SM_WDT_OFFSET 3
+#define SM_WDT_SIZE 1
+#define SM_NTAE_OFFSET 4
+#define SM_NTAE_SIZE 1
+#define SM_SERP_OFFSET 5
+#define SM_SERP_SIZE 1
+
+/* Bitfields in EIM_IER */
+
+/* Bitfields in EIM_IDR */
+
+/* Bitfields in EIM_IMR */
+
+/* Bitfields in EIM_ISR */
+
+/* Bitfields in EIM_ICR */
+
+/* Bitfields in EIM_MODE */
+
+/* Bitfields in EIM_EDGE */
+#define SM_INT0_OFFSET 0
+#define SM_INT0_SIZE 1
+#define SM_INT1_OFFSET 1
+#define SM_INT1_SIZE 1
+#define SM_INT2_OFFSET 2
+#define SM_INT2_SIZE 1
+#define SM_INT3_OFFSET 3
+#define SM_INT3_SIZE 1
+
+/* Bitfields in EIM_LEVEL */
+
+/* Bitfields in EIM_TEST */
+#define SM_TESTEN_OFFSET 31
+#define SM_TESTEN_SIZE 1
+
+/* Bitfields in EIM_NMIC */
+#define SM_EN_OFFSET 0
+#define SM_EN_SIZE 1
+
+/* Bit manipulation macros */
+#define SM_BIT(name) (1 << SM_##name##_OFFSET)
+#define SM_BF(name,value) (((value) & ((1 << SM_##name##_SIZE) - 1)) << SM_##name##_OFFSET)
+#define SM_BFEXT(name,value) (((value) >> SM_##name##_OFFSET) & ((1 << SM_##name##_SIZE) - 1))
+#define SM_BFINS(name,value,old) (((old) & ~(((1 << SM_##name##_SIZE) - 1) << SM_##name##_OFFSET)) | SM_BF(name,value))
+
+/* Register access macros */
+#define sm_readl(port,reg) readl((port)->regs + SM_##reg)
+#define sm_writel(port,reg,value) writel((value), (port)->regs + SM_##reg)
+
+#endif /* __ASM_AVR32_SM_H__ */
diff --git a/arch/avr32/mm/Makefile b/arch/avr32/mm/Makefile
new file mode 100644
index 000000000000..0066491f90d4
--- /dev/null
+++ b/arch/avr32/mm/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the Linux/AVR32 kernel.
+#
+
+obj-y += init.o clear_page.o copy_page.o dma-coherent.o
+obj-y += ioremap.o cache.o fault.o tlb.o
diff --git a/arch/avr32/mm/cache.c b/arch/avr32/mm/cache.c
new file mode 100644
index 000000000000..450515b245a0
--- /dev/null
+++ b/arch/avr32/mm/cache.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/highmem.h>
+#include <linux/unistd.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cachectl.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+/*
+ * If you attempt to flush anything more than this, you need superuser
+ * privileges. The value is completely arbitrary.
+ */
+#define CACHEFLUSH_MAX_LEN 1024
+
+void invalidate_dcache_region(void *start, size_t size)
+{
+ unsigned long v, begin, end, linesz;
+
+ linesz = boot_cpu_data.dcache.linesz;
+
+ //printk("invalidate dcache: %p + %u\n", start, size);
+
+ /* You asked for it, you got it */
+ begin = (unsigned long)start & ~(linesz - 1);
+ end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+
+ for (v = begin; v < end; v += linesz)
+ invalidate_dcache_line((void *)v);
+}
+
+void clean_dcache_region(void *start, size_t size)
+{
+ unsigned long v, begin, end, linesz;
+
+ linesz = boot_cpu_data.dcache.linesz;
+ begin = (unsigned long)start & ~(linesz - 1);
+ end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+
+ for (v = begin; v < end; v += linesz)
+ clean_dcache_line((void *)v);
+ flush_write_buffer();
+}
+
+void flush_dcache_region(void *start, size_t size)
+{
+ unsigned long v, begin, end, linesz;
+
+ linesz = boot_cpu_data.dcache.linesz;
+ begin = (unsigned long)start & ~(linesz - 1);
+ end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+
+ for (v = begin; v < end; v += linesz)
+ flush_dcache_line((void *)v);
+ flush_write_buffer();
+}
+
+void invalidate_icache_region(void *start, size_t size)
+{
+ unsigned long v, begin, end, linesz;
+
+ linesz = boot_cpu_data.icache.linesz;
+ begin = (unsigned long)start & ~(linesz - 1);
+ end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+
+ for (v = begin; v < end; v += linesz)
+ invalidate_icache_line((void *)v);
+}
+
+static inline void __flush_icache_range(unsigned long start, unsigned long end)
+{
+ unsigned long v, linesz;
+
+ linesz = boot_cpu_data.dcache.linesz;
+ for (v = start; v < end; v += linesz) {
+ clean_dcache_line((void *)v);
+ invalidate_icache_line((void *)v);
+ }
+
+ flush_write_buffer();
+}
+
+/*
+ * This one is called after a module has been loaded.
+ */
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+ unsigned long linesz;
+
+ linesz = boot_cpu_data.dcache.linesz;
+ __flush_icache_range(start & ~(linesz - 1),
+ (end + linesz - 1) & ~(linesz - 1));
+}
+
+/*
+ * This one is called from do_no_page(), do_swap_page() and install_page().
+ */
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+ if (vma->vm_flags & VM_EXEC) {
+ void *v = kmap(page);
+ __flush_icache_range((unsigned long)v, (unsigned long)v + PAGE_SIZE);
+ kunmap(v);
+ }
+}
+
+/*
+ * This one is used by copy_to_user_page()
+ */
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+ unsigned long addr, int len)
+{
+ if (vma->vm_flags & VM_EXEC)
+ flush_icache_range(addr, addr + len);
+}
+
+asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len)
+{
+ int ret;
+
+ if (len > CACHEFLUSH_MAX_LEN) {
+ ret = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto out;
+ }
+
+ ret = -EFAULT;
+ if (!access_ok(VERIFY_WRITE, addr, len))
+ goto out;
+
+ switch (operation) {
+ case CACHE_IFLUSH:
+ flush_icache_range((unsigned long)addr,
+ (unsigned long)addr + len);
+ ret = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+out:
+ return ret;
+}
diff --git a/arch/avr32/mm/clear_page.S b/arch/avr32/mm/clear_page.S
new file mode 100644
index 000000000000..5d70dca00699
--- /dev/null
+++ b/arch/avr32/mm/clear_page.S
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+/*
+ * clear_page
+ * r12: P1 address (to)
+ */
+ .text
+ .global clear_page
+clear_page:
+ sub r9, r12, -PAGE_SIZE
+ mov r10, 0
+ mov r11, 0
+0: st.d r12++, r10
+ cp r12, r9
+ brne 0b
+ mov pc, lr
diff --git a/arch/avr32/mm/copy_page.S b/arch/avr32/mm/copy_page.S
new file mode 100644
index 000000000000..c2b3752946b8
--- /dev/null
+++ b/arch/avr32/mm/copy_page.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+/*
+ * copy_page
+ *
+ * r12 to (P1 address)
+ * r11 from (P1 address)
+ * r8-r10 scratch
+ */
+ .text
+ .global copy_page
+copy_page:
+ sub r10, r11, -(1 << PAGE_SHIFT)
+ /* pref r11[0] */
+1: /* pref r11[8] */
+ ld.d r8, r11++
+ st.d r12++, r8
+ cp r11, r10
+ brlo 1b
+ mov pc, lr
diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c
new file mode 100644
index 000000000000..44ab8a7bdae2
--- /dev/null
+++ b/arch/avr32/mm/dma-coherent.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dma-mapping.h>
+
+#include <asm/addrspace.h>
+#include <asm/cacheflush.h>
+
+void dma_cache_sync(void *vaddr, size_t size, int direction)
+{
+ /*
+ * No need to sync an uncached area
+ */
+ if (PXSEG(vaddr) == P2SEG)
+ return;
+
+ switch (direction) {
+ case DMA_FROM_DEVICE: /* invalidate only */
+ dma_cache_inv(vaddr, size);
+ break;
+ case DMA_TO_DEVICE: /* writeback only */
+ dma_cache_wback(vaddr, size);
+ break;
+ case DMA_BIDIRECTIONAL: /* writeback and invalidate */
+ dma_cache_wback_inv(vaddr, size);
+ break;
+ default:
+ BUG();
+ }
+}
+EXPORT_SYMBOL(dma_cache_sync);
+
+static struct page *__dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp)
+{
+ struct page *page, *free, *end;
+ int order;
+
+ size = PAGE_ALIGN(size);
+ order = get_order(size);
+
+ page = alloc_pages(gfp, order);
+ if (!page)
+ return NULL;
+ split_page(page, order);
+
+ /*
+ * When accessing physical memory with valid cache data, we
+ * get a cache hit even if the virtual memory region is marked
+ * as uncached.
+ *
+ * Since the memory is newly allocated, there is no point in
+ * doing a writeback. If the previous owner cares, he should
+ * have flushed the cache before releasing the memory.
+ */
+ invalidate_dcache_region(phys_to_virt(page_to_phys(page)), size);
+
+ *handle = page_to_bus(page);
+ free = page + (size >> PAGE_SHIFT);
+ end = page + (1 << order);
+
+ /*
+ * Free any unused pages
+ */
+ while (free < end) {
+ __free_page(free);
+ free++;
+ }
+
+ return page;
+}
+
+static void __dma_free(struct device *dev, size_t size,
+ struct page *page, dma_addr_t handle)
+{
+ struct page *end = page + (PAGE_ALIGN(size) >> PAGE_SHIFT);
+
+ while (page < end)
+ __free_page(page++);
+}
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp)
+{
+ struct page *page;
+ void *ret = NULL;
+
+ page = __dma_alloc(dev, size, handle, gfp);
+ if (page)
+ ret = phys_to_uncached(page_to_phys(page));
+
+ return ret;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t handle)
+{
+ void *addr = phys_to_cached(uncached_to_phys(cpu_addr));
+ struct page *page;
+
+ pr_debug("dma_free_coherent addr %p (phys %08lx) size %u\n",
+ cpu_addr, (unsigned long)handle, (unsigned)size);
+ BUG_ON(!virt_addr_valid(addr));
+ page = virt_to_page(addr);
+ __dma_free(dev, size, page, handle);
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+#if 0
+void *dma_alloc_writecombine(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp)
+{
+ struct page *page;
+
+ page = __dma_alloc(dev, size, handle, gfp);
+
+ /* Now, map the page into P3 with write-combining turned on */
+ return __ioremap(page_to_phys(page), size, _PAGE_BUFFER);
+}
+EXPORT_SYMBOL(dma_alloc_writecombine);
+
+void dma_free_writecombine(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t handle)
+{
+ struct page *page;
+
+ iounmap(cpu_addr);
+
+ page = bus_to_page(handle);
+ __dma_free(dev, size, page, handle);
+}
+EXPORT_SYMBOL(dma_free_writecombine);
+#endif
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
new file mode 100644
index 000000000000..678557260a35
--- /dev/null
+++ b/arch/avr32/mm/fault.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/sh/mm/fault.c:
+ * Copyright (C) 1999 Niibe Yutaka
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+
+#include <asm/kdebug.h>
+#include <asm/mmu_context.h>
+#include <asm/sysreg.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+
+#ifdef DEBUG
+static void dump_code(unsigned long pc)
+{
+ char *p = (char *)pc;
+ char val;
+ int i;
+
+
+ printk(KERN_DEBUG "Code:");
+ for (i = 0; i < 16; i++) {
+ if (__get_user(val, p + i))
+ break;
+ printk(" %02x", val);
+ }
+ printk("\n");
+}
+#endif
+
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+
+/* Hook to register for page fault notifications */
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, struct pt_regs *regs,
+ int trap, int sig)
+{
+ struct die_args args = {
+ .regs = regs,
+ .trapnr = trap,
+ };
+ return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, struct pt_regs *regs,
+ int trap, int sig)
+{
+ return NOTIFY_DONE;
+}
+#endif
+
+/*
+ * This routine handles page faults. It determines the address and the
+ * problem, and then passes it off to one of the appropriate routines.
+ *
+ * ecr is the Exception Cause Register. Possible values are:
+ * 5: Page not found (instruction access)
+ * 6: Protection fault (instruction access)
+ * 12: Page not found (read access)
+ * 13: Page not found (write access)
+ * 14: Protection fault (read access)
+ * 15: Protection fault (write access)
+ */
+asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
+{
+ struct task_struct *tsk;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ const struct exception_table_entry *fixup;
+ unsigned long address;
+ unsigned long page;
+ int writeaccess = 0;
+
+ if (notify_page_fault(DIE_PAGE_FAULT, regs,
+ ecr, SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ address = sysreg_read(TLBEAR);
+
+ tsk = current;
+ mm = tsk->mm;
+
+ /*
+ * If we're in an interrupt or have no user context, we must
+ * not take the fault...
+ */
+ if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
+ goto no_context;
+
+ local_irq_enable();
+
+ down_read(&mm->mmap_sem);
+
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto bad_area;
+ if (vma->vm_start <= address)
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+ if (expand_stack(vma, address))
+ goto bad_area;
+
+ /*
+ * Ok, we have a good vm_area for this memory access, so we
+ * can handle it...
+ */
+good_area:
+ //pr_debug("good area: vm_flags = 0x%lx\n", vma->vm_flags);
+ switch (ecr) {
+ case ECR_PROTECTION_X:
+ case ECR_TLB_MISS_X:
+ if (!(vma->vm_flags & VM_EXEC))
+ goto bad_area;
+ break;
+ case ECR_PROTECTION_R:
+ case ECR_TLB_MISS_R:
+ if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
+ goto bad_area;
+ break;
+ case ECR_PROTECTION_W:
+ case ECR_TLB_MISS_W:
+ if (!(vma->vm_flags & VM_WRITE))
+ goto bad_area;
+ writeaccess = 1;
+ break;
+ default:
+ panic("Unhandled case %lu in do_page_fault!", ecr);
+ }
+
+ /*
+ * If for any reason at all we couldn't handle the fault, make
+ * sure we exit gracefully rather than endlessly redo the
+ * fault.
+ */
+survive:
+ switch (handle_mm_fault(mm, vma, address, writeaccess)) {
+ case VM_FAULT_MINOR:
+ tsk->min_flt++;
+ break;
+ case VM_FAULT_MAJOR:
+ tsk->maj_flt++;
+ break;
+ case VM_FAULT_SIGBUS:
+ goto do_sigbus;
+ case VM_FAULT_OOM:
+ goto out_of_memory;
+ default:
+ BUG();
+ }
+
+ up_read(&mm->mmap_sem);
+ return;
+
+ /*
+ * Something tried to access memory that isn't in our memory
+ * map. Fix it, but check if it's kernel or user first...
+ */
+bad_area:
+ pr_debug("Bad area [%s:%u]: addr %08lx, ecr %lu\n",
+ tsk->comm, tsk->pid, address, ecr);
+
+ up_read(&mm->mmap_sem);
+
+ if (user_mode(regs)) {
+ /* Hmm...we have to pass address and ecr somehow... */
+ /* tsk->thread.address = address;
+ tsk->thread.error_code = ecr; */
+#ifdef DEBUG
+ show_regs(regs);
+ dump_code(regs->pc);
+
+ page = sysreg_read(PTBR);
+ printk("ptbr = %08lx", page);
+ if (page) {
+ page = ((unsigned long *)page)[address >> 22];
+ printk(" pgd = %08lx", page);
+ if (page & _PAGE_PRESENT) {
+ page &= PAGE_MASK;
+ address &= 0x003ff000;
+ page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT];
+ printk(" pte = %08lx\n", page);
+ }
+ }
+#endif
+ pr_debug("Sending SIGSEGV to PID %d...\n",
+ tsk->pid);
+ force_sig(SIGSEGV, tsk);
+ return;
+ }
+
+no_context:
+ pr_debug("No context\n");
+
+ /* Are we prepared to handle this kernel fault? */
+ fixup = search_exception_tables(regs->pc);
+ if (fixup) {
+ regs->pc = fixup->fixup;
+ pr_debug("Found fixup at %08lx\n", fixup->fixup);
+ return;
+ }
+
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have
+ * to terminate things with extreme prejudice.
+ */
+ if (address < PAGE_SIZE)
+ printk(KERN_ALERT
+ "Unable to handle kernel NULL pointer dereference");
+ else
+ printk(KERN_ALERT
+ "Unable to handle kernel paging request");
+ printk(" at virtual address %08lx\n", address);
+ printk(KERN_ALERT "pc = %08lx\n", regs->pc);
+
+ page = sysreg_read(PTBR);
+ printk(KERN_ALERT "ptbr = %08lx", page);
+ if (page) {
+ page = ((unsigned long *)page)[address >> 22];
+ printk(" pgd = %08lx", page);
+ if (page & _PAGE_PRESENT) {
+ page &= PAGE_MASK;
+ address &= 0x003ff000;
+ page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT];
+ printk(" pte = %08lx\n", page);
+ }
+ }
+ die("\nOops", regs, ecr);
+ do_exit(SIGKILL);
+
+ /*
+ * We ran out of memory, or some other thing happened to us
+ * that made us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+ printk("Out of memory\n");
+ up_read(&mm->mmap_sem);
+ if (current->pid == 1) {
+ yield();
+ down_read(&mm->mmap_sem);
+ goto survive;
+ }
+ printk("VM: Killing process %s\n", tsk->comm);
+ if (user_mode(regs))
+ do_exit(SIGKILL);
+ goto no_context;
+
+do_sigbus:
+ up_read(&mm->mmap_sem);
+
+ /*
+ * Send a sigbus, regardless of whether we were in kernel or
+ * user mode.
+ */
+ /* address, error_code, trap_no, ... */
+#ifdef DEBUG
+ show_regs(regs);
+ dump_code(regs->pc);
+#endif
+ pr_debug("Sending SIGBUS to PID %d...\n", tsk->pid);
+ force_sig(SIGBUS, tsk);
+
+ /* Kernel mode? Handle exceptions or die */
+ if (!user_mode(regs))
+ goto no_context;
+}
+
+asmlinkage void do_bus_error(unsigned long addr, int write_access,
+ struct pt_regs *regs)
+{
+ printk(KERN_ALERT
+ "Bus error at physical address 0x%08lx (%s access)\n",
+ addr, write_access ? "write" : "read");
+ printk(KERN_INFO "DTLB dump:\n");
+ dump_dtlb();
+ die("Bus Error", regs, write_access);
+ do_exit(SIGKILL);
+}
+
+/*
+ * This functionality is currently not possible to implement because
+ * we're using segmentation to ensure a fixed mapping of the kernel
+ * virtual address space.
+ *
+ * It would be possible to implement this, but it would require us to
+ * disable segmentation at startup and load the kernel mappings into
+ * the TLB like any other pages. There will be lots of trickery to
+ * avoid recursive invocation of the TLB miss handler, though...
+ */
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+
+}
+EXPORT_SYMBOL(kernel_map_pages);
+#endif
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
new file mode 100644
index 000000000000..3e6c41039808
--- /dev/null
+++ b/arch/avr32/mm/init.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/mmzone.h>
+#include <linux/bootmem.h>
+#include <linux/pagemap.h>
+#include <linux/pfn.h>
+#include <linux/nodemask.h>
+
+#include <asm/page.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+#include <asm/io.h>
+#include <asm/dma.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+struct page *empty_zero_page;
+
+/*
+ * Cache of MMU context last used.
+ */
+unsigned long mmu_context_cache = NO_CONTEXT;
+
+#define START_PFN (NODE_DATA(0)->bdata->node_boot_start >> PAGE_SHIFT)
+#define MAX_LOW_PFN (NODE_DATA(0)->bdata->node_low_pfn)
+
+void show_mem(void)
+{
+ int total = 0, reserved = 0, cached = 0;
+ int slab = 0, free = 0, shared = 0;
+ pg_data_t *pgdat;
+
+ printk("Mem-info:\n");
+ show_free_areas();
+
+ for_each_online_pgdat(pgdat) {
+ struct page *page, *end;
+
+ page = pgdat->node_mem_map;
+ end = page + pgdat->node_spanned_pages;
+
+ do {
+ total++;
+ if (PageReserved(page))
+ reserved++;
+ else if (PageSwapCache(page))
+ cached++;
+ else if (PageSlab(page))
+ slab++;
+ else if (!page_count(page))
+ free++;
+ else
+ shared += page_count(page) - 1;
+ page++;
+ } while (page < end);
+ }
+
+ printk ("%d pages of RAM\n", total);
+ printk ("%d free pages\n", free);
+ printk ("%d reserved pages\n", reserved);
+ printk ("%d slab pages\n", slab);
+ printk ("%d pages shared\n", shared);
+ printk ("%d pages swap cached\n", cached);
+}
+
+static void __init print_memory_map(const char *what,
+ struct tag_mem_range *mem)
+{
+ printk ("%s:\n", what);
+ for (; mem; mem = mem->next) {
+ printk (" %08lx - %08lx\n",
+ (unsigned long)mem->addr,
+ (unsigned long)(mem->addr + mem->size));
+ }
+}
+
+#define MAX_LOWMEM HIGHMEM_START
+#define MAX_LOWMEM_PFN PFN_DOWN(MAX_LOWMEM)
+
+/*
+ * Sort a list of memory regions in-place by ascending address.
+ *
+ * We're using bubble sort because we only have singly linked lists
+ * with few elements.
+ */
+static void __init sort_mem_list(struct tag_mem_range **pmem)
+{
+ int done;
+ struct tag_mem_range **a, **b;
+
+ if (!*pmem)
+ return;
+
+ do {
+ done = 1;
+ a = pmem, b = &(*pmem)->next;
+ while (*b) {
+ if ((*a)->addr > (*b)->addr) {
+ struct tag_mem_range *tmp;
+ tmp = (*b)->next;
+ (*b)->next = *a;
+ *a = *b;
+ *b = tmp;
+ done = 0;
+ }
+ a = &(*a)->next;
+ b = &(*a)->next;
+ }
+ } while (!done);
+}
+
+/*
+ * Find a free memory region large enough for storing the
+ * bootmem bitmap.
+ */
+static unsigned long __init
+find_bootmap_pfn(const struct tag_mem_range *mem)
+{
+ unsigned long bootmap_pages, bootmap_len;
+ unsigned long node_pages = PFN_UP(mem->size);
+ unsigned long bootmap_addr = mem->addr;
+ struct tag_mem_range *reserved = mem_reserved;
+ struct tag_mem_range *ramdisk = mem_ramdisk;
+ unsigned long kern_start = virt_to_phys(_stext);
+ unsigned long kern_end = virt_to_phys(_end);
+
+ bootmap_pages = bootmem_bootmap_pages(node_pages);
+ bootmap_len = bootmap_pages << PAGE_SHIFT;
+
+ /*
+ * Find a large enough region without reserved pages for
+ * storing the bootmem bitmap. We can take advantage of the
+ * fact that all lists have been sorted.
+ *
+ * We have to check explicitly reserved regions as well as the
+ * kernel image and any RAMDISK images...
+ *
+ * Oh, and we have to make sure we don't overwrite the taglist
+ * since we're going to use it until the bootmem allocator is
+ * fully up and running.
+ */
+ while (1) {
+ if ((bootmap_addr < kern_end) &&
+ ((bootmap_addr + bootmap_len) > kern_start))
+ bootmap_addr = kern_end;
+
+ while (reserved &&
+ (bootmap_addr >= (reserved->addr + reserved->size)))
+ reserved = reserved->next;
+
+ if (reserved &&
+ ((bootmap_addr + bootmap_len) >= reserved->addr)) {
+ bootmap_addr = reserved->addr + reserved->size;
+ continue;
+ }
+
+ while (ramdisk &&
+ (bootmap_addr >= (ramdisk->addr + ramdisk->size)))
+ ramdisk = ramdisk->next;
+
+ if (!ramdisk ||
+ ((bootmap_addr + bootmap_len) < ramdisk->addr))
+ break;
+
+ bootmap_addr = ramdisk->addr + ramdisk->size;
+ }
+
+ if ((PFN_UP(bootmap_addr) + bootmap_len) >= (mem->addr + mem->size))
+ return ~0UL;
+
+ return PFN_UP(bootmap_addr);
+}
+
+void __init setup_bootmem(void)
+{
+ unsigned bootmap_size;
+ unsigned long first_pfn, bootmap_pfn, pages;
+ unsigned long max_pfn, max_low_pfn;
+ unsigned long kern_start = virt_to_phys(_stext);
+ unsigned long kern_end = virt_to_phys(_end);
+ unsigned node = 0;
+ struct tag_mem_range *bank, *res;
+
+ sort_mem_list(&mem_phys);
+ sort_mem_list(&mem_reserved);
+
+ print_memory_map("Physical memory", mem_phys);
+ print_memory_map("Reserved memory", mem_reserved);
+
+ nodes_clear(node_online_map);
+
+ if (mem_ramdisk) {
+#ifdef CONFIG_BLK_DEV_INITRD
+ initrd_start = __va(mem_ramdisk->addr);
+ initrd_end = initrd_start + mem_ramdisk->size;
+
+ print_memory_map("RAMDISK images", mem_ramdisk);
+ if (mem_ramdisk->next)
+ printk(KERN_WARNING
+ "Warning: Only the first RAMDISK image "
+ "will be used\n");
+ sort_mem_list(&mem_ramdisk);
+#else
+ printk(KERN_WARNING "RAM disk image present, but "
+ "no initrd support in kernel!\n");
+#endif
+ }
+
+ if (mem_phys->next)
+ printk(KERN_WARNING "Only using first memory bank\n");
+
+ for (bank = mem_phys; bank; bank = NULL) {
+ first_pfn = PFN_UP(bank->addr);
+ max_low_pfn = max_pfn = PFN_DOWN(bank->addr + bank->size);
+ bootmap_pfn = find_bootmap_pfn(bank);
+ if (bootmap_pfn > max_pfn)
+ panic("No space for bootmem bitmap!\n");
+
+ if (max_low_pfn > MAX_LOWMEM_PFN) {
+ max_low_pfn = MAX_LOWMEM_PFN;
+#ifndef CONFIG_HIGHMEM
+ /*
+ * Lowmem is memory that can be addressed
+ * directly through P1/P2
+ */
+ printk(KERN_WARNING
+ "Node %u: Only %ld MiB of memory will be used.\n",
+ node, MAX_LOWMEM >> 20);
+ printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+#else
+#error HIGHMEM is not supported by AVR32 yet
+#endif
+ }
+
+ /* Initialize the boot-time allocator with low memory only. */
+ bootmap_size = init_bootmem_node(NODE_DATA(node), bootmap_pfn,
+ first_pfn, max_low_pfn);
+
+ printk("Node %u: bdata = %p, bdata->node_bootmem_map = %p\n",
+ node, NODE_DATA(node)->bdata,
+ NODE_DATA(node)->bdata->node_bootmem_map);
+
+ /*
+ * Register fully available RAM pages with the bootmem
+ * allocator.
+ */
+ pages = max_low_pfn - first_pfn;
+ free_bootmem_node (NODE_DATA(node), PFN_PHYS(first_pfn),
+ PFN_PHYS(pages));
+
+ /*
+ * Reserve space for the kernel image (if present in
+ * this node)...
+ */
+ if ((kern_start >= PFN_PHYS(first_pfn)) &&
+ (kern_start < PFN_PHYS(max_pfn))) {
+ printk("Node %u: Kernel image %08lx - %08lx\n",
+ node, kern_start, kern_end);
+ reserve_bootmem_node(NODE_DATA(node), kern_start,
+ kern_end - kern_start);
+ }
+
+ /* ...the bootmem bitmap... */
+ reserve_bootmem_node(NODE_DATA(node),
+ PFN_PHYS(bootmap_pfn),
+ bootmap_size);
+
+ /* ...any RAMDISK images... */
+ for (res = mem_ramdisk; res; res = res->next) {
+ if (res->addr > PFN_PHYS(max_pfn))
+ break;
+
+ if (res->addr >= PFN_PHYS(first_pfn)) {
+ printk("Node %u: RAMDISK %08lx - %08lx\n",
+ node,
+ (unsigned long)res->addr,
+ (unsigned long)(res->addr + res->size));
+ reserve_bootmem_node(NODE_DATA(node),
+ res->addr, res->size);
+ }
+ }
+
+ /* ...and any other reserved regions. */
+ for (res = mem_reserved; res; res = res->next) {
+ if (res->addr > PFN_PHYS(max_pfn))
+ break;
+
+ if (res->addr >= PFN_PHYS(first_pfn)) {
+ printk("Node %u: Reserved %08lx - %08lx\n",
+ node,
+ (unsigned long)res->addr,
+ (unsigned long)(res->addr + res->size));
+ reserve_bootmem_node(NODE_DATA(node),
+ res->addr, res->size);
+ }
+ }
+
+ node_set_online(node);
+ }
+}
+
+/*
+ * paging_init() sets up the page tables
+ *
+ * This routine also unmaps the page at virtual kernel address 0, so
+ * that we can trap those pesky NULL-reference errors in the kernel.
+ */
+void __init paging_init(void)
+{
+ extern unsigned long _evba;
+ void *zero_page;
+ int nid;
+
+ /*
+ * Make sure we can handle exceptions before enabling
+ * paging. Not that we should ever _get_ any exceptions this
+ * early, but you never know...
+ */
+ printk("Exception vectors start at %p\n", &_evba);
+ sysreg_write(EVBA, (unsigned long)&_evba);
+
+ /*
+ * Since we are ready to handle exceptions now, we should let
+ * the CPU generate them...
+ */
+ __asm__ __volatile__ ("csrf %0" : : "i"(SR_EM_BIT));
+
+ /*
+ * Allocate the zero page. The allocator will panic if it
+ * can't satisfy the request, so no need to check.
+ */
+ zero_page = alloc_bootmem_low_pages_node(NODE_DATA(0),
+ PAGE_SIZE);
+
+ {
+ pgd_t *pg_dir;
+ int i;
+
+ pg_dir = swapper_pg_dir;
+ sysreg_write(PTBR, (unsigned long)pg_dir);
+
+ for (i = 0; i < PTRS_PER_PGD; i++)
+ pgd_val(pg_dir[i]) = 0;
+
+ enable_mmu();
+ printk ("CPU: Paging enabled\n");
+ }
+
+ for_each_online_node(nid) {
+ pg_data_t *pgdat = NODE_DATA(nid);
+ unsigned long zones_size[MAX_NR_ZONES];
+ unsigned long low, start_pfn;
+
+ start_pfn = pgdat->bdata->node_boot_start;
+ start_pfn >>= PAGE_SHIFT;
+ low = pgdat->bdata->node_low_pfn;
+
+ memset(zones_size, 0, sizeof(zones_size));
+ zones_size[ZONE_NORMAL] = low - start_pfn;
+
+ printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n",
+ nid, start_pfn, low);
+
+ free_area_init_node(nid, pgdat, zones_size, start_pfn, NULL);
+
+ printk("Node %u: mem_map starts at %p\n",
+ pgdat->node_id, pgdat->node_mem_map);
+ }
+
+ mem_map = NODE_DATA(0)->node_mem_map;
+
+ memset(zero_page, 0, PAGE_SIZE);
+ empty_zero_page = virt_to_page(zero_page);
+ flush_dcache_page(empty_zero_page);
+}
+
+void __init mem_init(void)
+{
+ int codesize, reservedpages, datasize, initsize;
+ int nid, i;
+
+ reservedpages = 0;
+ high_memory = NULL;
+
+ /* this will put all low memory onto the freelists */
+ for_each_online_node(nid) {
+ pg_data_t *pgdat = NODE_DATA(nid);
+ unsigned long node_pages = 0;
+ void *node_high_memory;
+
+ num_physpages += pgdat->node_present_pages;
+
+ if (pgdat->node_spanned_pages != 0)
+ node_pages = free_all_bootmem_node(pgdat);
+
+ totalram_pages += node_pages;
+
+ for (i = 0; i < node_pages; i++)
+ if (PageReserved(pgdat->node_mem_map + i))
+ reservedpages++;
+
+ node_high_memory = (void *)((pgdat->node_start_pfn
+ + pgdat->node_spanned_pages)
+ << PAGE_SHIFT);
+ if (node_high_memory > high_memory)
+ high_memory = node_high_memory;
+ }
+
+ max_mapnr = MAP_NR(high_memory);
+
+ codesize = (unsigned long)_etext - (unsigned long)_text;
+ datasize = (unsigned long)_edata - (unsigned long)_data;
+ initsize = (unsigned long)__init_end - (unsigned long)__init_begin;
+
+ printk ("Memory: %luk/%luk available (%dk kernel code, "
+ "%dk reserved, %dk data, %dk init)\n",
+ (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10),
+ totalram_pages << (PAGE_SHIFT - 10),
+ codesize >> 10,
+ reservedpages << (PAGE_SHIFT - 10),
+ datasize >> 10,
+ initsize >> 10);
+}
+
+static inline void free_area(unsigned long addr, unsigned long end, char *s)
+{
+ unsigned int size = (end - addr) >> 10;
+
+ for (; addr < end; addr += PAGE_SIZE) {
+ struct page *page = virt_to_page(addr);
+ ClearPageReserved(page);
+ init_page_count(page);
+ free_page(addr);
+ totalram_pages++;
+ }
+
+ if (size && s)
+ printk(KERN_INFO "Freeing %s memory: %dK (%lx - %lx)\n",
+ s, size, end - (size << 10), end);
+}
+
+void free_initmem(void)
+{
+ free_area((unsigned long)__init_begin, (unsigned long)__init_end,
+ "init");
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+static int keep_initrd;
+
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+ if (!keep_initrd)
+ free_area(start, end, "initrd");
+}
+
+static int __init keepinitrd_setup(char *__unused)
+{
+ keep_initrd = 1;
+ return 1;
+}
+
+__setup("keepinitrd", keepinitrd_setup);
+#endif
diff --git a/arch/avr32/mm/ioremap.c b/arch/avr32/mm/ioremap.c
new file mode 100644
index 000000000000..536021877df6
--- /dev/null
+++ b/arch/avr32/mm/ioremap.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/addrspace.h>
+
+static inline int remap_area_pte(pte_t *pte, unsigned long address,
+ unsigned long end, unsigned long phys_addr,
+ pgprot_t prot)
+{
+ unsigned long pfn;
+
+ pfn = phys_addr >> PAGE_SHIFT;
+ do {
+ WARN_ON(!pte_none(*pte));
+
+ set_pte(pte, pfn_pte(pfn, prot));
+ address += PAGE_SIZE;
+ pfn++;
+ pte++;
+ } while (address && (address < end));
+
+ return 0;
+}
+
+static inline int remap_area_pmd(pmd_t *pmd, unsigned long address,
+ unsigned long end, unsigned long phys_addr,
+ pgprot_t prot)
+{
+ unsigned long next;
+
+ phys_addr -= address;
+
+ do {
+ pte_t *pte = pte_alloc_kernel(pmd, address);
+ if (!pte)
+ return -ENOMEM;
+
+ next = (address + PMD_SIZE) & PMD_MASK;
+ if (remap_area_pte(pte, address, next,
+ address + phys_addr, prot))
+ return -ENOMEM;
+
+ address = next;
+ pmd++;
+ } while (address && (address < end));
+ return 0;
+}
+
+static int remap_area_pud(pud_t *pud, unsigned long address,
+ unsigned long end, unsigned long phys_addr,
+ pgprot_t prot)
+{
+ unsigned long next;
+
+ phys_addr -= address;
+
+ do {
+ pmd_t *pmd = pmd_alloc(&init_mm, pud, address);
+ if (!pmd)
+ return -ENOMEM;
+ next = (address + PUD_SIZE) & PUD_MASK;
+ if (remap_area_pmd(pmd, address, next,
+ phys_addr + address, prot))
+ return -ENOMEM;
+
+ address = next;
+ pud++;
+ } while (address && address < end);
+
+ return 0;
+}
+
+static int remap_area_pages(unsigned long address, unsigned long phys_addr,
+ size_t size, pgprot_t prot)
+{
+ unsigned long end = address + size;
+ unsigned long next;
+ pgd_t *pgd;
+ int err = 0;
+
+ phys_addr -= address;
+
+ pgd = pgd_offset_k(address);
+ flush_cache_all();
+ BUG_ON(address >= end);
+
+ spin_lock(&init_mm.page_table_lock);
+ do {
+ pud_t *pud = pud_alloc(&init_mm, pgd, address);
+
+ err = -ENOMEM;
+ if (!pud)
+ break;
+
+ next = (address + PGDIR_SIZE) & PGDIR_MASK;
+ if (next < address || next > end)
+ next = end;
+ err = remap_area_pud(pud, address, next,
+ phys_addr + address, prot);
+ if (err)
+ break;
+
+ address = next;
+ pgd++;
+ } while (address && (address < end));
+
+ spin_unlock(&init_mm.page_table_lock);
+ flush_tlb_all();
+ return err;
+}
+
+/*
+ * Re-map an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access physical
+ * memory directly.
+ */
+void __iomem *__ioremap(unsigned long phys_addr, size_t size,
+ unsigned long flags)
+{
+ void *addr;
+ struct vm_struct *area;
+ unsigned long offset, last_addr;
+ pgprot_t prot;
+
+ /*
+ * Check if we can simply use the P4 segment. This area is
+ * uncacheable, so if caching/buffering is requested, we can't
+ * use it.
+ */
+ if ((phys_addr >= P4SEG) && (flags == 0))
+ return (void __iomem *)phys_addr;
+
+ /* Don't allow wraparound or zero size */
+ last_addr = phys_addr + size - 1;
+ if (!size || last_addr < phys_addr)
+ return NULL;
+
+ /*
+ * XXX: When mapping regular RAM, we'd better make damn sure
+ * it's never used for anything else. But this is really the
+ * caller's responsibility...
+ */
+ if (PHYSADDR(P2SEGADDR(phys_addr)) == phys_addr)
+ return (void __iomem *)P2SEGADDR(phys_addr);
+
+ /* Mappings have to be page-aligned */
+ offset = phys_addr & ~PAGE_MASK;
+ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr + 1) - phys_addr;
+
+ prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY
+ | _PAGE_ACCESSED | _PAGE_TYPE_SMALL | flags);
+
+ /*
+ * Ok, go for it..
+ */
+ area = get_vm_area(size, VM_IOREMAP);
+ if (!area)
+ return NULL;
+ area->phys_addr = phys_addr;
+ addr = area->addr;
+ if (remap_area_pages((unsigned long)addr, phys_addr, size, prot)) {
+ vunmap(addr);
+ return NULL;
+ }
+
+ return (void __iomem *)(offset + (char *)addr);
+}
+EXPORT_SYMBOL(__ioremap);
+
+void __iounmap(void __iomem *addr)
+{
+ struct vm_struct *p;
+
+ if ((unsigned long)addr >= P4SEG)
+ return;
+
+ p = remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr));
+ if (unlikely(!p)) {
+ printk (KERN_ERR "iounmap: bad address %p\n", addr);
+ return;
+ }
+
+ kfree (p);
+}
+EXPORT_SYMBOL(__iounmap);
diff --git a/arch/avr32/mm/tlb.c b/arch/avr32/mm/tlb.c
new file mode 100644
index 000000000000..5d0523bbe298
--- /dev/null
+++ b/arch/avr32/mm/tlb.c
@@ -0,0 +1,378 @@
+/*
+ * AVR32 TLB operations
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+
+#include <asm/mmu_context.h>
+
+#define _TLBEHI_I 0x100
+
+void show_dtlb_entry(unsigned int index)
+{
+ unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags;
+
+ local_irq_save(flags);
+ mmucr_save = sysreg_read(MMUCR);
+ tlbehi_save = sysreg_read(TLBEHI);
+ mmucr = mmucr_save & 0x13;
+ mmucr |= index << 14;
+ sysreg_write(MMUCR, mmucr);
+
+ asm volatile("tlbr" : : : "memory");
+ cpu_sync_pipeline();
+
+ tlbehi = sysreg_read(TLBEHI);
+ tlbelo = sysreg_read(TLBELO);
+
+ printk("%2u: %c %c %02x %05x %05x %o %o %c %c %c %c\n",
+ index,
+ (tlbehi & 0x200)?'1':'0',
+ (tlbelo & 0x100)?'1':'0',
+ (tlbehi & 0xff),
+ (tlbehi >> 12), (tlbelo >> 12),
+ (tlbelo >> 4) & 7, (tlbelo >> 2) & 3,
+ (tlbelo & 0x200)?'1':'0',
+ (tlbelo & 0x080)?'1':'0',
+ (tlbelo & 0x001)?'1':'0',
+ (tlbelo & 0x002)?'1':'0');
+
+ sysreg_write(MMUCR, mmucr_save);
+ sysreg_write(TLBEHI, tlbehi_save);
+ cpu_sync_pipeline();
+ local_irq_restore(flags);
+}
+
+void dump_dtlb(void)
+{
+ unsigned int i;
+
+ printk("ID V G ASID VPN PFN AP SZ C B W D\n");
+ for (i = 0; i < 32; i++)
+ show_dtlb_entry(i);
+}
+
+static unsigned long last_mmucr;
+
+static inline void set_replacement_pointer(unsigned shift)
+{
+ unsigned long mmucr, mmucr_save;
+
+ mmucr = mmucr_save = sysreg_read(MMUCR);
+
+ /* Does this mapping already exist? */
+ __asm__ __volatile__(
+ " tlbs\n"
+ " mfsr %0, %1"
+ : "=r"(mmucr)
+ : "i"(SYSREG_MMUCR));
+
+ if (mmucr & SYSREG_BIT(MMUCR_N)) {
+ /* Not found -- pick a not-recently-accessed entry */
+ unsigned long rp;
+ unsigned long tlbar = sysreg_read(TLBARLO);
+
+ rp = 32 - fls(tlbar);
+ if (rp == 32) {
+ rp = 0;
+ sysreg_write(TLBARLO, -1L);
+ }
+
+ mmucr &= 0x13;
+ mmucr |= (rp << shift);
+
+ sysreg_write(MMUCR, mmucr);
+ }
+
+ last_mmucr = mmucr;
+}
+
+static void update_dtlb(unsigned long address, pte_t pte, unsigned long asid)
+{
+ unsigned long vpn;
+
+ vpn = (address & MMU_VPN_MASK) | _TLBEHI_VALID | asid;
+ sysreg_write(TLBEHI, vpn);
+ cpu_sync_pipeline();
+
+ set_replacement_pointer(14);
+
+ sysreg_write(TLBELO, pte_val(pte) & _PAGE_FLAGS_HARDWARE_MASK);
+
+ /* Let's go */
+ asm volatile("nop\n\ttlbw" : : : "memory");
+ cpu_sync_pipeline();
+}
+
+void update_mmu_cache(struct vm_area_struct *vma,
+ unsigned long address, pte_t pte)
+{
+ unsigned long flags;
+
+ /* ptrace may call this routine */
+ if (vma && current->active_mm != vma->vm_mm)
+ return;
+
+ local_irq_save(flags);
+ update_dtlb(address, pte, get_asid());
+ local_irq_restore(flags);
+}
+
+void __flush_tlb_page(unsigned long asid, unsigned long page)
+{
+ unsigned long mmucr, tlbehi;
+
+ page |= asid;
+ sysreg_write(TLBEHI, page);
+ cpu_sync_pipeline();
+ asm volatile("tlbs");
+ mmucr = sysreg_read(MMUCR);
+
+ if (!(mmucr & SYSREG_BIT(MMUCR_N))) {
+ unsigned long tlbarlo;
+ unsigned long entry;
+
+ /* Clear the "valid" bit */
+ tlbehi = sysreg_read(TLBEHI);
+ tlbehi &= ~_TLBEHI_VALID;
+ sysreg_write(TLBEHI, tlbehi);
+ cpu_sync_pipeline();
+
+ /* mark the entry as "not accessed" */
+ entry = (mmucr >> 14) & 0x3f;
+ tlbarlo = sysreg_read(TLBARLO);
+ tlbarlo |= (0x80000000 >> entry);
+ sysreg_write(TLBARLO, tlbarlo);
+
+ /* update the entry with valid bit clear */
+ asm volatile("tlbw");
+ cpu_sync_pipeline();
+ }
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+ if (vma->vm_mm && vma->vm_mm->context != NO_CONTEXT) {
+ unsigned long flags, asid;
+ unsigned long saved_asid = MMU_NO_ASID;
+
+ asid = vma->vm_mm->context & MMU_CONTEXT_ASID_MASK;
+ page &= PAGE_MASK;
+
+ local_irq_save(flags);
+ if (vma->vm_mm != current->mm) {
+ saved_asid = get_asid();
+ set_asid(asid);
+ }
+
+ __flush_tlb_page(asid, page);
+
+ if (saved_asid != MMU_NO_ASID)
+ set_asid(saved_asid);
+ local_irq_restore(flags);
+ }
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ struct mm_struct *mm = vma->vm_mm;
+
+ if (mm->context != NO_CONTEXT) {
+ unsigned long flags;
+ int size;
+
+ local_irq_save(flags);
+ size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ if (size > (MMU_DTLB_ENTRIES / 4)) { /* Too many entries to flush */
+ mm->context = NO_CONTEXT;
+ if (mm == current->mm)
+ activate_context(mm);
+ } else {
+ unsigned long asid = mm->context & MMU_CONTEXT_ASID_MASK;
+ unsigned long saved_asid = MMU_NO_ASID;
+
+ start &= PAGE_MASK;
+ end += (PAGE_SIZE - 1);
+ end &= PAGE_MASK;
+ if (mm != current->mm) {
+ saved_asid = get_asid();
+ set_asid(asid);
+ }
+
+ while (start < end) {
+ __flush_tlb_page(asid, start);
+ start += PAGE_SIZE;
+ }
+ if (saved_asid != MMU_NO_ASID)
+ set_asid(saved_asid);
+ }
+ local_irq_restore(flags);
+ }
+}
+
+/*
+ * TODO: If this is only called for addresses > TASK_SIZE, we can probably
+ * skip the ASID stuff and just use the Global bit...
+ */
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ unsigned long flags;
+ int size;
+
+ local_irq_save(flags);
+ size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ if (size > (MMU_DTLB_ENTRIES / 4)) { /* Too many entries to flush */
+ flush_tlb_all();
+ } else {
+ unsigned long asid = init_mm.context & MMU_CONTEXT_ASID_MASK;
+ unsigned long saved_asid = get_asid();
+
+ start &= PAGE_MASK;
+ end += (PAGE_SIZE - 1);
+ end &= PAGE_MASK;
+ set_asid(asid);
+ while (start < end) {
+ __flush_tlb_page(asid, start);
+ start += PAGE_SIZE;
+ }
+ set_asid(saved_asid);
+ }
+ local_irq_restore(flags);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+ /* Invalidate all TLB entries of this process by getting a new ASID */
+ if (mm->context != NO_CONTEXT) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mm->context = NO_CONTEXT;
+ if (mm == current->mm)
+ activate_context(mm);
+ local_irq_restore(flags);
+ }
+}
+
+void flush_tlb_all(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ sysreg_write(MMUCR, sysreg_read(MMUCR) | SYSREG_BIT(MMUCR_I));
+ local_irq_restore(flags);
+}
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+
+static void *tlb_start(struct seq_file *tlb, loff_t *pos)
+{
+ static unsigned long tlb_index;
+
+ if (*pos >= 32)
+ return NULL;
+
+ tlb_index = 0;
+ return &tlb_index;
+}
+
+static void *tlb_next(struct seq_file *tlb, void *v, loff_t *pos)
+{
+ unsigned long *index = v;
+
+ if (*index >= 31)
+ return NULL;
+
+ ++*pos;
+ ++*index;
+ return index;
+}
+
+static void tlb_stop(struct seq_file *tlb, void *v)
+{
+
+}
+
+static int tlb_show(struct seq_file *tlb, void *v)
+{
+ unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags;
+ unsigned long *index = v;
+
+ if (*index == 0)
+ seq_puts(tlb, "ID V G ASID VPN PFN AP SZ C B W D\n");
+
+ BUG_ON(*index >= 32);
+
+ local_irq_save(flags);
+ mmucr_save = sysreg_read(MMUCR);
+ tlbehi_save = sysreg_read(TLBEHI);
+ mmucr = mmucr_save & 0x13;
+ mmucr |= *index << 14;
+ sysreg_write(MMUCR, mmucr);
+
+ asm volatile("tlbr" : : : "memory");
+ cpu_sync_pipeline();
+
+ tlbehi = sysreg_read(TLBEHI);
+ tlbelo = sysreg_read(TLBELO);
+
+ sysreg_write(MMUCR, mmucr_save);
+ sysreg_write(TLBEHI, tlbehi_save);
+ cpu_sync_pipeline();
+ local_irq_restore(flags);
+
+ seq_printf(tlb, "%2lu: %c %c %02x %05x %05x %o %o %c %c %c %c\n",
+ *index,
+ (tlbehi & 0x200)?'1':'0',
+ (tlbelo & 0x100)?'1':'0',
+ (tlbehi & 0xff),
+ (tlbehi >> 12), (tlbelo >> 12),
+ (tlbelo >> 4) & 7, (tlbelo >> 2) & 3,
+ (tlbelo & 0x200)?'1':'0',
+ (tlbelo & 0x080)?'1':'0',
+ (tlbelo & 0x001)?'1':'0',
+ (tlbelo & 0x002)?'1':'0');
+
+ return 0;
+}
+
+static struct seq_operations tlb_ops = {
+ .start = tlb_start,
+ .next = tlb_next,
+ .stop = tlb_stop,
+ .show = tlb_show,
+};
+
+static int tlb_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &tlb_ops);
+}
+
+static struct file_operations proc_tlb_operations = {
+ .open = tlb_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int __init proctlb_init(void)
+{
+ struct proc_dir_entry *entry;
+
+ entry = create_proc_entry("tlb", 0, NULL);
+ if (entry)
+ entry->proc_fops = &proc_tlb_operations;
+ return 0;
+}
+late_initcall(proctlb_init);
+#endif /* CONFIG_PROC_FS */
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index a601a17cf568..f7b171b92ea2 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -27,7 +27,11 @@ config GENERIC_CALIBRATE_DELAY
config GENERIC_HARDIRQS
bool
- default n
+ default y
+
+config GENERIC_HARDIRQS_NO__DO_IRQ
+ bool
+ default y
config GENERIC_TIME
bool
@@ -251,6 +255,12 @@ config MB93091_NO_MB
endchoice
endif
+config FUJITSU_MB93493
+ bool "MB93493 Multimedia chip"
+ help
+ Select this option if the MB93493 multimedia chip is going to be
+ used.
+
choice
prompt "GP-Relative data support"
default GPREL_DATA_8
diff --git a/arch/frv/kernel/Makefile b/arch/frv/kernel/Makefile
index 5a827b349b5e..32db3499c461 100644
--- a/arch/frv/kernel/Makefile
+++ b/arch/frv/kernel/Makefile
@@ -10,15 +10,14 @@ extra-y:= head.o init_task.o vmlinux.lds
obj-y := $(heads-y) entry.o entry-table.o break.o switch_to.o kernel_thread.o \
process.o traps.o ptrace.o signal.o dma.o \
sys_frv.o time.o semaphore.o setup.o frv_ksyms.o \
- debug-stub.o irq.o irq-routing.o sleep.o uaccess.o
+ debug-stub.o irq.o sleep.o uaccess.o
obj-$(CONFIG_GDBSTUB) += gdb-stub.o gdb-io.o
obj-$(CONFIG_MB93091_VDK) += irq-mb93091.o
-obj-$(CONFIG_MB93093_PDK) += irq-mb93093.o
-obj-$(CONFIG_FUJITSU_MB93493) += irq-mb93493.o
obj-$(CONFIG_PM) += pm.o cmode.o
obj-$(CONFIG_MB93093_PDK) += pm-mb93093.o
+obj-$(CONFIG_FUJITSU_MB93493) += irq-mb93493.o
obj-$(CONFIG_SYSCTL) += sysctl.o
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_MODULES) += module.o
diff --git a/arch/frv/kernel/irq-mb93091.c b/arch/frv/kernel/irq-mb93091.c
index 1381abcd5cc9..369bc0a7443d 100644
--- a/arch/frv/kernel/irq-mb93091.c
+++ b/arch/frv/kernel/irq-mb93091.c
@@ -24,7 +24,6 @@
#include <asm/delay.h>
#include <asm/irq.h>
#include <asm/irc-regs.h>
-#include <asm/irq-routing.h>
#define __reg16(ADDR) (*(volatile unsigned short *)(ADDR))
@@ -33,83 +32,131 @@
#define __get_IFR() ({ __reg16(0xffc0000c); })
#define __clr_IFR(M) do { __reg16(0xffc0000c) = ~(M); wmb(); } while(0)
-static void frv_fpga_doirq(struct irq_source *source);
-static void frv_fpga_control(struct irq_group *group, int irq, int on);
-/*****************************************************************************/
/*
- * FPGA IRQ multiplexor
+ * on-motherboard FPGA PIC operations
*/
-static struct irq_source frv_fpga[4] = {
-#define __FPGA(X, M) \
- [X] = { \
- .muxname = "fpga."#X, \
- .irqmask = M, \
- .doirq = frv_fpga_doirq, \
- }
+static void frv_fpga_mask(unsigned int irq)
+{
+ uint16_t imr = __get_IMR();
- __FPGA(0, 0x0028),
- __FPGA(1, 0x0050),
- __FPGA(2, 0x1c00),
- __FPGA(3, 0x6386),
-};
+ imr |= 1 << (irq - IRQ_BASE_FPGA);
-static struct irq_group frv_fpga_irqs = {
- .first_irq = IRQ_BASE_FPGA,
- .control = frv_fpga_control,
- .sources = {
- [ 1] = &frv_fpga[3],
- [ 2] = &frv_fpga[3],
- [ 3] = &frv_fpga[0],
- [ 4] = &frv_fpga[1],
- [ 5] = &frv_fpga[0],
- [ 6] = &frv_fpga[1],
- [ 7] = &frv_fpga[3],
- [ 8] = &frv_fpga[3],
- [ 9] = &frv_fpga[3],
- [10] = &frv_fpga[2],
- [11] = &frv_fpga[2],
- [12] = &frv_fpga[2],
- [13] = &frv_fpga[3],
- [14] = &frv_fpga[3],
- },
-};
+ __set_IMR(imr);
+}
+static void frv_fpga_ack(unsigned int irq)
+{
+ __clr_IFR(1 << (irq - IRQ_BASE_FPGA));
+}
-static void frv_fpga_control(struct irq_group *group, int index, int on)
+static void frv_fpga_mask_ack(unsigned int irq)
{
uint16_t imr = __get_IMR();
- if (on)
- imr &= ~(1 << index);
- else
- imr |= 1 << index;
+ imr |= 1 << (irq - IRQ_BASE_FPGA);
+ __set_IMR(imr);
+
+ __clr_IFR(1 << (irq - IRQ_BASE_FPGA));
+}
+
+static void frv_fpga_unmask(unsigned int irq)
+{
+ uint16_t imr = __get_IMR();
+
+ imr &= ~(1 << (irq - IRQ_BASE_FPGA));
__set_IMR(imr);
}
-static void frv_fpga_doirq(struct irq_source *source)
+static struct irq_chip frv_fpga_pic = {
+ .name = "mb93091",
+ .ack = frv_fpga_ack,
+ .mask = frv_fpga_mask,
+ .mask_ack = frv_fpga_mask_ack,
+ .unmask = frv_fpga_unmask,
+};
+
+/*
+ * FPGA PIC interrupt handler
+ */
+static irqreturn_t fpga_interrupt(int irq, void *_mask, struct pt_regs *regs)
{
- uint16_t mask, imr;
+ uint16_t imr, mask = (unsigned long) _mask;
imr = __get_IMR();
- mask = source->irqmask & ~imr & __get_IFR();
- if (mask) {
- __set_IMR(imr | mask);
- __clr_IFR(mask);
- distribute_irqs(&frv_fpga_irqs, mask);
- __set_IMR(imr);
+ mask = mask & ~imr & __get_IFR();
+
+ /* poll all the triggered IRQs */
+ while (mask) {
+ int irq;
+
+ asm("scan %1,gr0,%0" : "=r"(irq) : "r"(mask));
+ irq = 31 - irq;
+ mask &= ~(1 << irq);
+
+ generic_handle_irq(IRQ_BASE_FPGA + irq, regs);
}
+
+ return IRQ_HANDLED;
}
+/*
+ * define an interrupt action for each FPGA PIC output
+ * - use dev_id to indicate the FPGA PIC input to output mappings
+ */
+static struct irqaction fpga_irq[4] = {
+ [0] = {
+ .handler = fpga_interrupt,
+ .flags = IRQF_DISABLED | IRQF_SHARED,
+ .mask = CPU_MASK_NONE,
+ .name = "fpga.0",
+ .dev_id = (void *) 0x0028UL,
+ },
+ [1] = {
+ .handler = fpga_interrupt,
+ .flags = IRQF_DISABLED | IRQF_SHARED,
+ .mask = CPU_MASK_NONE,
+ .name = "fpga.1",
+ .dev_id = (void *) 0x0050UL,
+ },
+ [2] = {
+ .handler = fpga_interrupt,
+ .flags = IRQF_DISABLED | IRQF_SHARED,
+ .mask = CPU_MASK_NONE,
+ .name = "fpga.2",
+ .dev_id = (void *) 0x1c00UL,
+ },
+ [3] = {
+ .handler = fpga_interrupt,
+ .flags = IRQF_DISABLED | IRQF_SHARED,
+ .mask = CPU_MASK_NONE,
+ .name = "fpga.3",
+ .dev_id = (void *) 0x6386UL,
+ }
+};
+
+/*
+ * initialise the motherboard FPGA's PIC
+ */
void __init fpga_init(void)
{
+ int irq;
+
+ /* all PIC inputs are all set to be low-level driven, apart from the
+ * NMI button (15) which is fixed at falling-edge
+ */
__set_IMR(0x7ffe);
__clr_IFR(0x0000);
- frv_irq_route_external(&frv_fpga[0], IRQ_CPU_EXTERNAL0);
- frv_irq_route_external(&frv_fpga[1], IRQ_CPU_EXTERNAL1);
- frv_irq_route_external(&frv_fpga[2], IRQ_CPU_EXTERNAL2);
- frv_irq_route_external(&frv_fpga[3], IRQ_CPU_EXTERNAL3);
- frv_irq_set_group(&frv_fpga_irqs);
+ for (irq = IRQ_BASE_FPGA + 1; irq <= IRQ_BASE_FPGA + 14; irq++)
+ set_irq_chip_and_handler(irq, &frv_fpga_pic, handle_level_irq);
+
+ set_irq_chip_and_handler(IRQ_FPGA_NMI, &frv_fpga_pic, handle_edge_irq);
+
+ /* the FPGA drives the first four external IRQ inputs on the CPU PIC */
+ setup_irq(IRQ_CPU_EXTERNAL0, &fpga_irq[0]);
+ setup_irq(IRQ_CPU_EXTERNAL1, &fpga_irq[1]);
+ setup_irq(IRQ_CPU_EXTERNAL2, &fpga_irq[2]);
+ setup_irq(IRQ_CPU_EXTERNAL3, &fpga_irq[3]);
}
diff --git a/arch/frv/kernel/irq-mb93093.c b/arch/frv/kernel/irq-mb93093.c
index 48b2a6420888..a43a22158956 100644
--- a/arch/frv/kernel/irq-mb93093.c
+++ b/arch/frv/kernel/irq-mb93093.c
@@ -1,6 +1,6 @@
/* irq-mb93093.c: MB93093 FPGA interrupt handling
*
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -24,7 +24,6 @@
#include <asm/delay.h>
#include <asm/irq.h>
#include <asm/irc-regs.h>
-#include <asm/irq-routing.h>
#define __reg16(ADDR) (*(volatile unsigned short *)(__region_CS2 + (ADDR)))
@@ -33,66 +32,102 @@
#define __get_IFR() ({ __reg16(0x02); })
#define __clr_IFR(M) do { __reg16(0x02) = ~(M); wmb(); } while(0)
-static void frv_fpga_doirq(struct irq_source *source);
-static void frv_fpga_control(struct irq_group *group, int irq, int on);
-
-/*****************************************************************************/
/*
- * FPGA IRQ multiplexor
+ * off-CPU FPGA PIC operations
*/
-static struct irq_source frv_fpga[4] = {
-#define __FPGA(X, M) \
- [X] = { \
- .muxname = "fpga."#X, \
- .irqmask = M, \
- .doirq = frv_fpga_doirq, \
- }
+static void frv_fpga_mask(unsigned int irq)
+{
+ uint16_t imr = __get_IMR();
- __FPGA(0, 0x0700),
-};
+ imr |= 1 << (irq - IRQ_BASE_FPGA);
+ __set_IMR(imr);
+}
-static struct irq_group frv_fpga_irqs = {
- .first_irq = IRQ_BASE_FPGA,
- .control = frv_fpga_control,
- .sources = {
- [ 8] = &frv_fpga[0],
- [ 9] = &frv_fpga[0],
- [10] = &frv_fpga[0],
- },
-};
+static void frv_fpga_ack(unsigned int irq)
+{
+ __clr_IFR(1 << (irq - IRQ_BASE_FPGA));
+}
+
+static void frv_fpga_mask_ack(unsigned int irq)
+{
+ uint16_t imr = __get_IMR();
+ imr |= 1 << (irq - IRQ_BASE_FPGA);
+ __set_IMR(imr);
+
+ __clr_IFR(1 << (irq - IRQ_BASE_FPGA));
+}
-static void frv_fpga_control(struct irq_group *group, int index, int on)
+static void frv_fpga_unmask(unsigned int irq)
{
uint16_t imr = __get_IMR();
- if (on)
- imr &= ~(1 << index);
- else
- imr |= 1 << index;
+ imr &= ~(1 << (irq - IRQ_BASE_FPGA));
__set_IMR(imr);
}
-static void frv_fpga_doirq(struct irq_source *source)
+static struct irq_chip frv_fpga_pic = {
+ .name = "mb93093",
+ .ack = frv_fpga_ack,
+ .mask = frv_fpga_mask,
+ .mask_ack = frv_fpga_mask_ack,
+ .unmask = frv_fpga_unmask,
+ .end = frv_fpga_end,
+};
+
+/*
+ * FPGA PIC interrupt handler
+ */
+static irqreturn_t fpga_interrupt(int irq, void *_mask, struct pt_regs *regs)
{
- uint16_t mask, imr;
+ uint16_t imr, mask = (unsigned long) _mask;
imr = __get_IMR();
- mask = source->irqmask & ~imr & __get_IFR();
- if (mask) {
- __set_IMR(imr | mask);
- __clr_IFR(mask);
- distribute_irqs(&frv_fpga_irqs, mask);
- __set_IMR(imr);
+ mask = mask & ~imr & __get_IFR();
+
+ /* poll all the triggered IRQs */
+ while (mask) {
+ int irq;
+
+ asm("scan %1,gr0,%0" : "=r"(irq) : "r"(mask));
+ irq = 31 - irq;
+ mask &= ~(1 << irq);
+
+ generic_irq_handle(IRQ_BASE_FPGA + irq, regs);
}
+
+ return IRQ_HANDLED;
}
+/*
+ * define an interrupt action for each FPGA PIC output
+ * - use dev_id to indicate the FPGA PIC input to output mappings
+ */
+static struct irqaction fpga_irq[1] = {
+ [0] = {
+ .handler = fpga_interrupt,
+ .flags = IRQF_DISABLED,
+ .mask = CPU_MASK_NONE,
+ .name = "fpga.0",
+ .dev_id = (void *) 0x0700UL,
+ }
+};
+
+/*
+ * initialise the motherboard FPGA's PIC
+ */
void __init fpga_init(void)
{
+ int irq;
+
+ /* all PIC inputs are all set to be edge triggered */
__set_IMR(0x0700);
__clr_IFR(0x0000);
- frv_irq_route_external(&frv_fpga[0], IRQ_CPU_EXTERNAL2);
- frv_irq_set_group(&frv_fpga_irqs);
+ for (irq = IRQ_BASE_FPGA + 8; irq <= IRQ_BASE_FPGA + 10; irq++)
+ set_irq_chip_and_handler(irq, &frv_fpga_pic, handle_edge_irq);
+
+ /* the FPGA drives external IRQ input #2 on the CPU PIC */
+ setup_irq(IRQ_CPU_EXTERNAL2, &fpga_irq[0]);
}
diff --git a/arch/frv/kernel/irq-mb93493.c b/arch/frv/kernel/irq-mb93493.c
index 988d035640e1..39c0188a3498 100644
--- a/arch/frv/kernel/irq-mb93493.c
+++ b/arch/frv/kernel/irq-mb93493.c
@@ -1,6 +1,6 @@
/* irq-mb93493.c: MB93493 companion chip interrupt handler
*
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -24,84 +24,126 @@
#include <asm/delay.h>
#include <asm/irq.h>
#include <asm/irc-regs.h>
-#include <asm/irq-routing.h>
#include <asm/mb93493-irqs.h>
+#include <asm/mb93493-regs.h>
-static void frv_mb93493_doirq(struct irq_source *source);
+#define IRQ_ROUTE_ONE(X) (X##_ROUTE << (X - IRQ_BASE_MB93493))
+
+#define IRQ_ROUTING \
+ (IRQ_ROUTE_ONE(IRQ_MB93493_VDC) | \
+ IRQ_ROUTE_ONE(IRQ_MB93493_VCC) | \
+ IRQ_ROUTE_ONE(IRQ_MB93493_AUDIO_OUT) | \
+ IRQ_ROUTE_ONE(IRQ_MB93493_I2C_0) | \
+ IRQ_ROUTE_ONE(IRQ_MB93493_I2C_1) | \
+ IRQ_ROUTE_ONE(IRQ_MB93493_USB) | \
+ IRQ_ROUTE_ONE(IRQ_MB93493_LOCAL_BUS) | \
+ IRQ_ROUTE_ONE(IRQ_MB93493_PCMCIA) | \
+ IRQ_ROUTE_ONE(IRQ_MB93493_GPIO) | \
+ IRQ_ROUTE_ONE(IRQ_MB93493_AUDIO_IN))
-/*****************************************************************************/
/*
- * MB93493 companion chip IRQ multiplexor
+ * daughter board PIC operations
+ * - there is no way to ACK interrupts in the MB93493 chip
*/
-static struct irq_source frv_mb93493[2] = {
- [0] = {
- .muxname = "mb93493.0",
- .muxdata = __region_CS3 + 0x3d0,
- .doirq = frv_mb93493_doirq,
- .irqmask = 0x0000,
- },
- [1] = {
- .muxname = "mb93493.1",
- .muxdata = __region_CS3 + 0x3d4,
- .doirq = frv_mb93493_doirq,
- .irqmask = 0x0000,
- },
-};
-
-static void frv_mb93493_control(struct irq_group *group, int index, int on)
+static void frv_mb93493_mask(unsigned int irq)
{
- struct irq_source *source;
uint32_t iqsr;
+ volatile void *piqsr;
- if ((frv_mb93493[0].irqmask & (1 << index)))
- source = &frv_mb93493[0];
+ if (IRQ_ROUTING & (1 << (irq - IRQ_BASE_MB93493)))
+ piqsr = __addr_MB93493_IQSR(1);
else
- source = &frv_mb93493[1];
+ piqsr = __addr_MB93493_IQSR(0);
+
+ iqsr = readl(piqsr);
+ iqsr &= ~(1 << (irq - IRQ_BASE_MB93493 + 16));
+ writel(iqsr, piqsr);
+}
- iqsr = readl(source->muxdata);
- if (on)
- iqsr |= 1 << (index + 16);
+static void frv_mb93493_ack(unsigned int irq)
+{
+}
+
+static void frv_mb93493_unmask(unsigned int irq)
+{
+ uint32_t iqsr;
+ volatile void *piqsr;
+
+ if (IRQ_ROUTING & (1 << (irq - IRQ_BASE_MB93493)))
+ piqsr = __addr_MB93493_IQSR(1);
else
- iqsr &= ~(1 << (index + 16));
+ piqsr = __addr_MB93493_IQSR(0);
- writel(iqsr, source->muxdata);
+ iqsr = readl(piqsr);
+ iqsr |= 1 << (irq - IRQ_BASE_MB93493 + 16);
+ writel(iqsr, piqsr);
}
-static struct irq_group frv_mb93493_irqs = {
- .first_irq = IRQ_BASE_MB93493,
- .control = frv_mb93493_control,
+static struct irq_chip frv_mb93493_pic = {
+ .name = "mb93093",
+ .ack = frv_mb93493_ack,
+ .mask = frv_mb93493_mask,
+ .mask_ack = frv_mb93493_mask,
+ .unmask = frv_mb93493_unmask,
};
-static void frv_mb93493_doirq(struct irq_source *source)
+/*
+ * MB93493 PIC interrupt handler
+ */
+static irqreturn_t mb93493_interrupt(int irq, void *_piqsr, struct pt_regs *regs)
{
- uint32_t mask = readl(source->muxdata);
- mask = mask & (mask >> 16) & 0xffff;
+ volatile void *piqsr = _piqsr;
+ uint32_t iqsr;
- if (mask)
- distribute_irqs(&frv_mb93493_irqs, mask);
-}
+ iqsr = readl(piqsr);
+ iqsr = iqsr & (iqsr >> 16) & 0xffff;
-static void __init mb93493_irq_route(int irq, int source)
-{
- frv_mb93493[source].irqmask |= 1 << (irq - IRQ_BASE_MB93493);
- frv_mb93493_irqs.sources[irq - IRQ_BASE_MB93493] = &frv_mb93493[source];
+ /* poll all the triggered IRQs */
+ while (iqsr) {
+ int irq;
+
+ asm("scan %1,gr0,%0" : "=r"(irq) : "r"(iqsr));
+ irq = 31 - irq;
+ iqsr &= ~(1 << irq);
+
+ generic_handle_irq(IRQ_BASE_MB93493 + irq, regs);
+ }
+
+ return IRQ_HANDLED;
}
-void __init route_mb93493_irqs(void)
+/*
+ * define an interrupt action for each MB93493 PIC output
+ * - use dev_id to indicate the MB93493 PIC input to output mappings
+ */
+static struct irqaction mb93493_irq[2] = {
+ [0] = {
+ .handler = mb93493_interrupt,
+ .flags = IRQF_DISABLED | IRQF_SHARED,
+ .mask = CPU_MASK_NONE,
+ .name = "mb93493.0",
+ .dev_id = (void *) __addr_MB93493_IQSR(0),
+ },
+ [1] = {
+ .handler = mb93493_interrupt,
+ .flags = IRQF_DISABLED | IRQF_SHARED,
+ .mask = CPU_MASK_NONE,
+ .name = "mb93493.1",
+ .dev_id = (void *) __addr_MB93493_IQSR(1),
+ }
+};
+
+/*
+ * initialise the motherboard MB93493's PIC
+ */
+void __init mb93493_init(void)
{
- frv_irq_route_external(&frv_mb93493[0], IRQ_CPU_MB93493_0);
- frv_irq_route_external(&frv_mb93493[1], IRQ_CPU_MB93493_1);
-
- frv_irq_set_group(&frv_mb93493_irqs);
-
- mb93493_irq_route(IRQ_MB93493_VDC, IRQ_MB93493_VDC_ROUTE);
- mb93493_irq_route(IRQ_MB93493_VCC, IRQ_MB93493_VCC_ROUTE);
- mb93493_irq_route(IRQ_MB93493_AUDIO_IN, IRQ_MB93493_AUDIO_IN_ROUTE);
- mb93493_irq_route(IRQ_MB93493_I2C_0, IRQ_MB93493_I2C_0_ROUTE);
- mb93493_irq_route(IRQ_MB93493_I2C_1, IRQ_MB93493_I2C_1_ROUTE);
- mb93493_irq_route(IRQ_MB93493_USB, IRQ_MB93493_USB_ROUTE);
- mb93493_irq_route(IRQ_MB93493_LOCAL_BUS, IRQ_MB93493_LOCAL_BUS_ROUTE);
- mb93493_irq_route(IRQ_MB93493_PCMCIA, IRQ_MB93493_PCMCIA_ROUTE);
- mb93493_irq_route(IRQ_MB93493_GPIO, IRQ_MB93493_GPIO_ROUTE);
- mb93493_irq_route(IRQ_MB93493_AUDIO_OUT, IRQ_MB93493_AUDIO_OUT_ROUTE);
+ int irq;
+
+ for (irq = IRQ_BASE_MB93493 + 0; irq <= IRQ_BASE_MB93493 + 10; irq++)
+ set_irq_chip_and_handler(irq, &frv_mb93493_pic, handle_edge_irq);
+
+ /* the MB93493 drives external IRQ inputs on the CPU PIC */
+ setup_irq(IRQ_CPU_MB93493_0, &mb93493_irq[0]);
+ setup_irq(IRQ_CPU_MB93493_1, &mb93493_irq[1]);
}
diff --git a/arch/frv/kernel/irq-routing.c b/arch/frv/kernel/irq-routing.c
deleted file mode 100644
index 53886adf47de..000000000000
--- a/arch/frv/kernel/irq-routing.c
+++ /dev/null
@@ -1,291 +0,0 @@
-/* irq-routing.c: IRQ routing
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/serial_reg.h>
-#include <asm/io.h>
-#include <asm/irq-routing.h>
-#include <asm/irc-regs.h>
-#include <asm/serial-regs.h>
-#include <asm/dma.h>
-
-struct irq_level frv_irq_levels[16] = {
- [0 ... 15] = {
- .lock = SPIN_LOCK_UNLOCKED,
- }
-};
-
-struct irq_group *irq_groups[NR_IRQ_GROUPS];
-
-extern struct irq_group frv_cpu_irqs;
-
-void __init frv_irq_route(struct irq_source *source, int irqlevel)
-{
- source->level = &frv_irq_levels[irqlevel];
- source->next = frv_irq_levels[irqlevel].sources;
- frv_irq_levels[irqlevel].sources = source;
-}
-
-void __init frv_irq_route_external(struct irq_source *source, int irq)
-{
- int irqlevel = 0;
-
- switch (irq) {
- case IRQ_CPU_EXTERNAL0: irqlevel = IRQ_XIRQ0_LEVEL; break;
- case IRQ_CPU_EXTERNAL1: irqlevel = IRQ_XIRQ1_LEVEL; break;
- case IRQ_CPU_EXTERNAL2: irqlevel = IRQ_XIRQ2_LEVEL; break;
- case IRQ_CPU_EXTERNAL3: irqlevel = IRQ_XIRQ3_LEVEL; break;
- case IRQ_CPU_EXTERNAL4: irqlevel = IRQ_XIRQ4_LEVEL; break;
- case IRQ_CPU_EXTERNAL5: irqlevel = IRQ_XIRQ5_LEVEL; break;
- case IRQ_CPU_EXTERNAL6: irqlevel = IRQ_XIRQ6_LEVEL; break;
- case IRQ_CPU_EXTERNAL7: irqlevel = IRQ_XIRQ7_LEVEL; break;
- default: BUG();
- }
-
- source->level = &frv_irq_levels[irqlevel];
- source->next = frv_irq_levels[irqlevel].sources;
- frv_irq_levels[irqlevel].sources = source;
-}
-
-void __init frv_irq_set_group(struct irq_group *group)
-{
- irq_groups[group->first_irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP] = group;
-}
-
-void distribute_irqs(struct irq_group *group, unsigned long irqmask)
-{
- struct irqaction *action;
- int irq;
-
- while (irqmask) {
- asm("scan %1,gr0,%0" : "=r"(irq) : "r"(irqmask));
- if (irq < 0 || irq > 31)
- asm volatile("break");
- irq = 31 - irq;
-
- irqmask &= ~(1 << irq);
- action = group->actions[irq];
-
- irq += group->first_irq;
-
- if (action) {
- int status = 0;
-
-// if (!(action->flags & IRQF_DISABLED))
-// local_irq_enable();
-
- do {
- status |= action->flags;
- action->handler(irq, action->dev_id, __frame);
- action = action->next;
- } while (action);
-
- if (status & IRQF_SAMPLE_RANDOM)
- add_interrupt_randomness(irq);
- local_irq_disable();
- }
- }
-}
-
-/*****************************************************************************/
-/*
- * CPU UART interrupts
- */
-static void frv_cpuuart_doirq(struct irq_source *source)
-{
-// uint8_t iir = readb(source->muxdata + UART_IIR * 8);
-// if ((iir & 0x0f) != UART_IIR_NO_INT)
- distribute_irqs(&frv_cpu_irqs, source->irqmask);
-}
-
-struct irq_source frv_cpuuart[2] = {
-#define __CPUUART(X, A) \
- [X] = { \
- .muxname = "uart", \
- .muxdata = (volatile void __iomem *)(unsigned long)A,\
- .irqmask = 1 << IRQ_CPU_UART##X, \
- .doirq = frv_cpuuart_doirq, \
- }
-
- __CPUUART(0, UART0_BASE),
- __CPUUART(1, UART1_BASE),
-};
-
-/*****************************************************************************/
-/*
- * CPU DMA interrupts
- */
-static void frv_cpudma_doirq(struct irq_source *source)
-{
- uint32_t cstr = readl(source->muxdata + DMAC_CSTRx);
- if (cstr & DMAC_CSTRx_INT)
- distribute_irqs(&frv_cpu_irqs, source->irqmask);
-}
-
-struct irq_source frv_cpudma[8] = {
-#define __CPUDMA(X, A) \
- [X] = { \
- .muxname = "dma", \
- .muxdata = (volatile void __iomem *)(unsigned long)A,\
- .irqmask = 1 << IRQ_CPU_DMA##X, \
- .doirq = frv_cpudma_doirq, \
- }
-
- __CPUDMA(0, 0xfe000900),
- __CPUDMA(1, 0xfe000980),
- __CPUDMA(2, 0xfe000a00),
- __CPUDMA(3, 0xfe000a80),
- __CPUDMA(4, 0xfe001000),
- __CPUDMA(5, 0xfe001080),
- __CPUDMA(6, 0xfe001100),
- __CPUDMA(7, 0xfe001180),
-};
-
-/*****************************************************************************/
-/*
- * CPU timer interrupts - can't tell whether they've generated an interrupt or not
- */
-static void frv_cputimer_doirq(struct irq_source *source)
-{
- distribute_irqs(&frv_cpu_irqs, source->irqmask);
-}
-
-struct irq_source frv_cputimer[3] = {
-#define __CPUTIMER(X) \
- [X] = { \
- .muxname = "timer", \
- .muxdata = NULL, \
- .irqmask = 1 << IRQ_CPU_TIMER##X, \
- .doirq = frv_cputimer_doirq, \
- }
-
- __CPUTIMER(0),
- __CPUTIMER(1),
- __CPUTIMER(2),
-};
-
-/*****************************************************************************/
-/*
- * external CPU interrupts - can't tell directly whether they've generated an interrupt or not
- */
-static void frv_cpuexternal_doirq(struct irq_source *source)
-{
- distribute_irqs(&frv_cpu_irqs, source->irqmask);
-}
-
-struct irq_source frv_cpuexternal[8] = {
-#define __CPUEXTERNAL(X) \
- [X] = { \
- .muxname = "ext", \
- .muxdata = NULL, \
- .irqmask = 1 << IRQ_CPU_EXTERNAL##X, \
- .doirq = frv_cpuexternal_doirq, \
- }
-
- __CPUEXTERNAL(0),
- __CPUEXTERNAL(1),
- __CPUEXTERNAL(2),
- __CPUEXTERNAL(3),
- __CPUEXTERNAL(4),
- __CPUEXTERNAL(5),
- __CPUEXTERNAL(6),
- __CPUEXTERNAL(7),
-};
-
-#define set_IRR(N,A,B,C,D) __set_IRR(N, (A << 28) | (B << 24) | (C << 20) | (D << 16))
-
-struct irq_group frv_cpu_irqs = {
- .sources = {
- [IRQ_CPU_UART0] = &frv_cpuuart[0],
- [IRQ_CPU_UART1] = &frv_cpuuart[1],
- [IRQ_CPU_TIMER0] = &frv_cputimer[0],
- [IRQ_CPU_TIMER1] = &frv_cputimer[1],
- [IRQ_CPU_TIMER2] = &frv_cputimer[2],
- [IRQ_CPU_DMA0] = &frv_cpudma[0],
- [IRQ_CPU_DMA1] = &frv_cpudma[1],
- [IRQ_CPU_DMA2] = &frv_cpudma[2],
- [IRQ_CPU_DMA3] = &frv_cpudma[3],
- [IRQ_CPU_DMA4] = &frv_cpudma[4],
- [IRQ_CPU_DMA5] = &frv_cpudma[5],
- [IRQ_CPU_DMA6] = &frv_cpudma[6],
- [IRQ_CPU_DMA7] = &frv_cpudma[7],
- [IRQ_CPU_EXTERNAL0] = &frv_cpuexternal[0],
- [IRQ_CPU_EXTERNAL1] = &frv_cpuexternal[1],
- [IRQ_CPU_EXTERNAL2] = &frv_cpuexternal[2],
- [IRQ_CPU_EXTERNAL3] = &frv_cpuexternal[3],
- [IRQ_CPU_EXTERNAL4] = &frv_cpuexternal[4],
- [IRQ_CPU_EXTERNAL5] = &frv_cpuexternal[5],
- [IRQ_CPU_EXTERNAL6] = &frv_cpuexternal[6],
- [IRQ_CPU_EXTERNAL7] = &frv_cpuexternal[7],
- },
-};
-
-/*****************************************************************************/
-/*
- * route the CPU's interrupt sources
- */
-void __init route_cpu_irqs(void)
-{
- frv_irq_set_group(&frv_cpu_irqs);
-
- __set_IITMR(0, 0x003f0000); /* DMA0-3, TIMER0-2 IRQ detect levels */
- __set_IITMR(1, 0x20000000); /* ERR0-1, UART0-1, DMA4-7 IRQ detect levels */
-
- /* route UART and error interrupts */
- frv_irq_route(&frv_cpuuart[0], IRQ_UART0_LEVEL);
- frv_irq_route(&frv_cpuuart[1], IRQ_UART1_LEVEL);
-
- set_IRR(6, IRQ_GDBSTUB_LEVEL, IRQ_GDBSTUB_LEVEL, IRQ_UART1_LEVEL, IRQ_UART0_LEVEL);
-
- /* route DMA channel interrupts */
- frv_irq_route(&frv_cpudma[0], IRQ_DMA0_LEVEL);
- frv_irq_route(&frv_cpudma[1], IRQ_DMA1_LEVEL);
- frv_irq_route(&frv_cpudma[2], IRQ_DMA2_LEVEL);
- frv_irq_route(&frv_cpudma[3], IRQ_DMA3_LEVEL);
- frv_irq_route(&frv_cpudma[4], IRQ_DMA4_LEVEL);
- frv_irq_route(&frv_cpudma[5], IRQ_DMA5_LEVEL);
- frv_irq_route(&frv_cpudma[6], IRQ_DMA6_LEVEL);
- frv_irq_route(&frv_cpudma[7], IRQ_DMA7_LEVEL);
-
- set_IRR(4, IRQ_DMA3_LEVEL, IRQ_DMA2_LEVEL, IRQ_DMA1_LEVEL, IRQ_DMA0_LEVEL);
- set_IRR(7, IRQ_DMA7_LEVEL, IRQ_DMA6_LEVEL, IRQ_DMA5_LEVEL, IRQ_DMA4_LEVEL);
-
- /* route timer interrupts */
- frv_irq_route(&frv_cputimer[0], IRQ_TIMER0_LEVEL);
- frv_irq_route(&frv_cputimer[1], IRQ_TIMER1_LEVEL);
- frv_irq_route(&frv_cputimer[2], IRQ_TIMER2_LEVEL);
-
- set_IRR(5, 0, IRQ_TIMER2_LEVEL, IRQ_TIMER1_LEVEL, IRQ_TIMER0_LEVEL);
-
- /* route external interrupts */
- frv_irq_route(&frv_cpuexternal[0], IRQ_XIRQ0_LEVEL);
- frv_irq_route(&frv_cpuexternal[1], IRQ_XIRQ1_LEVEL);
- frv_irq_route(&frv_cpuexternal[2], IRQ_XIRQ2_LEVEL);
- frv_irq_route(&frv_cpuexternal[3], IRQ_XIRQ3_LEVEL);
- frv_irq_route(&frv_cpuexternal[4], IRQ_XIRQ4_LEVEL);
- frv_irq_route(&frv_cpuexternal[5], IRQ_XIRQ5_LEVEL);
- frv_irq_route(&frv_cpuexternal[6], IRQ_XIRQ6_LEVEL);
- frv_irq_route(&frv_cpuexternal[7], IRQ_XIRQ7_LEVEL);
-
- set_IRR(2, IRQ_XIRQ7_LEVEL, IRQ_XIRQ6_LEVEL, IRQ_XIRQ5_LEVEL, IRQ_XIRQ4_LEVEL);
- set_IRR(3, IRQ_XIRQ3_LEVEL, IRQ_XIRQ2_LEVEL, IRQ_XIRQ1_LEVEL, IRQ_XIRQ0_LEVEL);
-
-#if defined(CONFIG_MB93091_VDK)
- __set_TM1(0x55550000); /* XIRQ7-0 all active low */
-#elif defined(CONFIG_MB93093_PDK)
- __set_TM1(0x15550000); /* XIRQ7 active high, 6-0 all active low */
-#else
-#error dont know external IRQ trigger levels for this setup
-#endif
-
-} /* end route_cpu_irqs() */
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index 08967010be04..5ac041c7c0a4 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -1,6 +1,6 @@
/* irq.c: FRV IRQ handling
*
- * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2003, 2004, 2006 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -9,13 +9,6 @@
* 2 of the License, or (at your option) any later version.
*/
-/*
- * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
- *
- * IRQs are in fact implemented a bit like signal handlers for the kernel.
- * Naturally it's not a 1:1 relation, but there are similarities.
- */
-
#include <linux/ptrace.h>
#include <linux/errno.h>
#include <linux/signal.h>
@@ -43,19 +36,16 @@
#include <asm/delay.h>
#include <asm/irq.h>
#include <asm/irc-regs.h>
-#include <asm/irq-routing.h>
#include <asm/gdb-stub.h>
-extern void __init fpga_init(void);
-extern void __init route_mb93493_irqs(void);
-
-static void register_irq_proc (unsigned int irq);
+#define set_IRR(N,A,B,C,D) __set_IRR(N, (A << 28) | (B << 24) | (C << 20) | (D << 16))
-/*
- * Special irq handlers.
- */
+extern void __init fpga_init(void);
+#ifdef CONFIG_FUJITSU_MB93493
+extern void __init mb93493_init(void);
+#endif
-irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) { return IRQ_HANDLED; }
+#define __reg16(ADDR) (*(volatile unsigned short *)(ADDR))
atomic_t irq_err_count;
@@ -64,215 +54,86 @@ atomic_t irq_err_count;
*/
int show_interrupts(struct seq_file *p, void *v)
{
- struct irqaction *action;
- struct irq_group *group;
+ int i = *(loff_t *) v, cpu;
+ struct irqaction * action;
unsigned long flags;
- int level, grp, ix, i, j;
-
- i = *(loff_t *) v;
-
- switch (i) {
- case 0:
- seq_printf(p, " ");
- for_each_online_cpu(j)
- seq_printf(p, "CPU%d ",j);
-
- seq_putc(p, '\n');
- break;
- case 1 ... NR_IRQ_GROUPS * NR_IRQ_ACTIONS_PER_GROUP:
- local_irq_save(flags);
-
- grp = (i - 1) / NR_IRQ_ACTIONS_PER_GROUP;
- group = irq_groups[grp];
- if (!group)
- goto skip;
-
- ix = (i - 1) % NR_IRQ_ACTIONS_PER_GROUP;
- action = group->actions[ix];
- if (!action)
- goto skip;
-
- seq_printf(p, "%3d: ", i - 1);
-
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i - 1]);
-#endif
-
- level = group->sources[ix]->level - frv_irq_levels;
-
- seq_printf(p, " %12s@%x", group->sources[ix]->muxname, level);
- seq_printf(p, " %s", action->name);
-
- for (action = action->next; action; action = action->next)
- seq_printf(p, ", %s", action->name);
+ if (i == 0) {
+ char cpuname[12];
+ seq_printf(p, " ");
+ for_each_present_cpu(cpu) {
+ sprintf(cpuname, "CPU%d", cpu);
+ seq_printf(p, " %10s", cpuname);
+ }
seq_putc(p, '\n');
-skip:
- local_irq_restore(flags);
- break;
+ }
- case NR_IRQ_GROUPS * NR_IRQ_ACTIONS_PER_GROUP + 1:
- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
- break;
+ if (i < NR_IRQS) {
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
+ action = irq_desc[i].action;
+ if (action) {
+ seq_printf(p, "%3d: ", i);
+ for_each_present_cpu(cpu)
+ seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
+ seq_printf(p, " %10s", irq_desc[i].chip->name ? : "-");
+ seq_printf(p, " %s", action->name);
+ for (action = action->next;
+ action;
+ action = action->next)
+ seq_printf(p, ", %s", action->name);
+
+ seq_putc(p, '\n');
+ }
- default:
- break;
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ } else if (i == NR_IRQS) {
+ seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count));
}
return 0;
}
-
/*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock.
+ * on-CPU PIC operations
*/
-
-/**
- * disable_irq_nosync - disable an irq without waiting
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Disables and Enables are
- * nested.
- * Unlike disable_irq(), this function does not ensure existing
- * instances of the IRQ handler have completed before returning.
- *
- * This function may be called from IRQ context.
- */
-
-void disable_irq_nosync(unsigned int irq)
+static void frv_cpupic_ack(unsigned int irqlevel)
{
- struct irq_source *source;
- struct irq_group *group;
- struct irq_level *level;
- unsigned long flags;
- int idx = irq & (NR_IRQ_ACTIONS_PER_GROUP - 1);
-
- group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP];
- if (!group)
- BUG();
-
- source = group->sources[idx];
- if (!source)
- BUG();
-
- level = source->level;
-
- spin_lock_irqsave(&level->lock, flags);
-
- if (group->control) {
- if (!group->disable_cnt[idx]++)
- group->control(group, idx, 0);
- } else if (!level->disable_count++) {
- __set_MASK(level - frv_irq_levels);
- }
-
- spin_unlock_irqrestore(&level->lock, flags);
+ __clr_RC(irqlevel);
+ __clr_IRL();
}
-EXPORT_SYMBOL(disable_irq_nosync);
-
-/**
- * disable_irq - disable an irq and wait for completion
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Enables and Disables are
- * nested.
- * This function waits for any pending IRQ handlers for this interrupt
- * to complete before returning. If you use this function while
- * holding a resource the IRQ handler may need you will deadlock.
- *
- * This function may be called - with care - from IRQ context.
- */
-
-void disable_irq(unsigned int irq)
+static void frv_cpupic_mask(unsigned int irqlevel)
{
- disable_irq_nosync(irq);
-
-#ifdef CONFIG_SMP
- if (!local_irq_count(smp_processor_id())) {
- do {
- barrier();
- } while (irq_desc[irq].status & IRQ_INPROGRESS);
- }
-#endif
+ __set_MASK(irqlevel);
}
-EXPORT_SYMBOL(disable_irq);
-
-/**
- * enable_irq - enable handling of an irq
- * @irq: Interrupt to enable
- *
- * Undoes the effect of one call to disable_irq(). If this
- * matches the last disable, processing of interrupts on this
- * IRQ line is re-enabled.
- *
- * This function may be called from IRQ context.
- */
-
-void enable_irq(unsigned int irq)
+static void frv_cpupic_mask_ack(unsigned int irqlevel)
{
- struct irq_source *source;
- struct irq_group *group;
- struct irq_level *level;
- unsigned long flags;
- int idx = irq & (NR_IRQ_ACTIONS_PER_GROUP - 1);
- int count;
-
- group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP];
- if (!group)
- BUG();
-
- source = group->sources[idx];
- if (!source)
- BUG();
-
- level = source->level;
-
- spin_lock_irqsave(&level->lock, flags);
-
- if (group->control)
- count = group->disable_cnt[idx];
- else
- count = level->disable_count;
-
- switch (count) {
- case 1:
- if (group->control) {
- if (group->actions[idx])
- group->control(group, idx, 1);
- } else {
- if (level->usage)
- __clr_MASK(level - frv_irq_levels);
- }
- /* fall-through */
-
- default:
- count--;
- break;
-
- case 0:
- printk("enable_irq(%u) unbalanced from %p\n", irq, __builtin_return_address(0));
- }
+ __set_MASK(irqlevel);
+ __clr_RC(irqlevel);
+ __clr_IRL();
+}
- if (group->control)
- group->disable_cnt[idx] = count;
- else
- level->disable_count = count;
+static void frv_cpupic_unmask(unsigned int irqlevel)
+{
+ __clr_MASK(irqlevel);
+}
- spin_unlock_irqrestore(&level->lock, flags);
+static void frv_cpupic_end(unsigned int irqlevel)
+{
+ __clr_MASK(irqlevel);
}
-EXPORT_SYMBOL(enable_irq);
+static struct irq_chip frv_cpu_pic = {
+ .name = "cpu",
+ .ack = frv_cpupic_ack,
+ .mask = frv_cpupic_mask,
+ .mask_ack = frv_cpupic_mask_ack,
+ .unmask = frv_cpupic_unmask,
+ .end = frv_cpupic_end,
+};
-/*****************************************************************************/
/*
* handles all normal device IRQ's
* - registers are referred to by the __frame variable (GR28)
@@ -281,463 +142,65 @@ EXPORT_SYMBOL(enable_irq);
*/
asmlinkage void do_IRQ(void)
{
- struct irq_source *source;
- int level, cpu;
-
irq_enter();
-
- level = (__frame->tbr >> 4) & 0xf;
- cpu = smp_processor_id();
-
- if ((unsigned long) __frame - (unsigned long) (current + 1) < 512)
- BUG();
-
- __set_MASK(level);
- __clr_RC(level);
- __clr_IRL();
-
- kstat_this_cpu.irqs[level]++;
-
- for (source = frv_irq_levels[level].sources; source; source = source->next)
- source->doirq(source);
-
- __clr_MASK(level);
-
+ generic_handle_irq(__get_IRL(), __frame);
irq_exit();
+}
-} /* end do_IRQ() */
-
-/*****************************************************************************/
/*
* handles all NMIs when not co-opted by the debugger
* - registers are referred to by the __frame variable (GR28)
*/
asmlinkage void do_NMI(void)
{
-} /* end do_NMI() */
-
-/*****************************************************************************/
-/**
- * request_irq - allocate an interrupt line
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs
- * @irqflags: Interrupt type flags
- * @devname: An ascii name for the claiming device
- * @dev_id: A cookie passed back to the handler function
- *
- * This call allocates interrupt resources and enables the
- * interrupt line and IRQ handling. From the point this
- * call is made your handler function may be invoked. Since
- * your handler function must clear any interrupt the board
- * raises, you must take care both to initialise your hardware
- * and to set up the interrupt handler in the right order.
- *
- * Dev_id must be globally unique. Normally the address of the
- * device data structure is used as the cookie. Since the handler
- * receives this value it makes sense to use it.
- *
- * If your interrupt is shared you must pass a non NULL dev_id
- * as this is required when freeing the interrupt.
- *
- * Flags:
- *
- * IRQF_SHARED Interrupt is shared
- *
- * IRQF_DISABLED Disable local interrupts while processing
- *
- * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy
- *
- */
-
-int request_irq(unsigned int irq,
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
- unsigned long irqflags,
- const char * devname,
- void *dev_id)
-{
- int retval;
- struct irqaction *action;
-
-#if 1
- /*
- * Sanity-check: shared interrupts should REALLY pass in
- * a real dev-ID, otherwise we'll have trouble later trying
- * to figure out which interrupt is which (messes up the
- * interrupt freeing logic etc).
- */
- if (irqflags & IRQF_SHARED) {
- if (!dev_id)
- printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n",
- devname, (&irq)[-1]);
- }
-#endif
-
- if ((irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP) >= NR_IRQ_GROUPS)
- return -EINVAL;
- if (!handler)
- return -EINVAL;
-
- action = (struct irqaction *) kmalloc(sizeof(struct irqaction), GFP_KERNEL);
- if (!action)
- return -ENOMEM;
-
- action->handler = handler;
- action->flags = irqflags;
- action->mask = CPU_MASK_NONE;
- action->name = devname;
- action->next = NULL;
- action->dev_id = dev_id;
-
- retval = setup_irq(irq, action);
- if (retval)
- kfree(action);
- return retval;
-}
-
-EXPORT_SYMBOL(request_irq);
-
-/**
- * free_irq - free an interrupt
- * @irq: Interrupt line to free
- * @dev_id: Device identity to free
- *
- * Remove an interrupt handler. The handler is removed and if the
- * interrupt line is no longer in use by any driver it is disabled.
- * On a shared IRQ the caller must ensure the interrupt is disabled
- * on the card it drives before calling this function. The function
- * does not return until any executing interrupts for this IRQ
- * have completed.
- *
- * This function may be called from interrupt context.
- *
- * Bugs: Attempting to free an irq in a handler for the same irq hangs
- * the machine.
- */
-
-void free_irq(unsigned int irq, void *dev_id)
-{
- struct irq_source *source;
- struct irq_group *group;
- struct irq_level *level;
- struct irqaction **p, **pp;
- unsigned long flags;
-
- if ((irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP) >= NR_IRQ_GROUPS)
- return;
-
- group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP];
- if (!group)
- BUG();
-
- source = group->sources[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)];
- if (!source)
- BUG();
-
- level = source->level;
- p = &group->actions[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)];
-
- spin_lock_irqsave(&level->lock, flags);
-
- for (pp = p; *pp; pp = &(*pp)->next) {
- struct irqaction *action = *pp;
-
- if (action->dev_id != dev_id)
- continue;
-
- /* found it - remove from the list of entries */
- *pp = action->next;
-
- level->usage--;
-
- if (p == pp && group->control)
- group->control(group, irq & (NR_IRQ_ACTIONS_PER_GROUP - 1), 0);
-
- if (level->usage == 0)
- __set_MASK(level - frv_irq_levels);
-
- spin_unlock_irqrestore(&level->lock,flags);
-
-#ifdef CONFIG_SMP
- /* Wait to make sure it's not being used on another CPU */
- while (desc->status & IRQ_INPROGRESS)
- barrier();
-#endif
- kfree(action);
- return;
- }
-}
-
-EXPORT_SYMBOL(free_irq);
-
-/*
- * IRQ autodetection code..
- *
- * This depends on the fact that any interrupt that comes in on to an
- * unassigned IRQ will cause GxICR_DETECT to be set
- */
-
-static DECLARE_MUTEX(probe_sem);
-
-/**
- * probe_irq_on - begin an interrupt autodetect
- *
- * Commence probing for an interrupt. The interrupts are scanned
- * and a mask of potential interrupt lines is returned.
- *
- */
-
-unsigned long probe_irq_on(void)
-{
- down(&probe_sem);
- return 0;
}
-EXPORT_SYMBOL(probe_irq_on);
-
/*
- * Return a mask of triggered interrupts (this
- * can handle only legacy ISA interrupts).
- */
-
-/**
- * probe_irq_mask - scan a bitmap of interrupt lines
- * @val: mask of interrupts to consider
- *
- * Scan the ISA bus interrupt lines and return a bitmap of
- * active interrupts. The interrupt probe logic state is then
- * returned to its previous value.
- *
- * Note: we need to scan all the irq's even though we will
- * only return ISA irq numbers - just so that we reset them
- * all to a known state.
- */
-unsigned int probe_irq_mask(unsigned long xmask)
-{
- up(&probe_sem);
- return 0;
-}
-
-EXPORT_SYMBOL(probe_irq_mask);
-
-/*
- * Return the one interrupt that triggered (this can
- * handle any interrupt source).
- */
-
-/**
- * probe_irq_off - end an interrupt autodetect
- * @xmask: mask of potential interrupts (unused)
- *
- * Scans the unused interrupt lines and returns the line which
- * appears to have triggered the interrupt. If no interrupt was
- * found then zero is returned. If more than one interrupt is
- * found then minus the first candidate is returned to indicate
- * their is doubt.
- *
- * The interrupt probe logic state is returned to its previous
- * value.
- *
- * BUGS: When used in a module (which arguably shouldnt happen)
- * nothing prevents two IRQ probe callers from overlapping. The
- * results of this are non-optimal.
+ * initialise the interrupt system
*/
-
-int probe_irq_off(unsigned long xmask)
-{
- up(&probe_sem);
- return -1;
-}
-
-EXPORT_SYMBOL(probe_irq_off);
-
-/* this was setup_x86_irq but it seems pretty generic */
-int setup_irq(unsigned int irq, struct irqaction *new)
-{
- struct irq_source *source;
- struct irq_group *group;
- struct irq_level *level;
- struct irqaction **p, **pp;
- unsigned long flags;
-
- group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP];
- if (!group)
- BUG();
-
- source = group->sources[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)];
- if (!source)
- BUG();
-
- level = source->level;
-
- p = &group->actions[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)];
-
- /*
- * Some drivers like serial.c use request_irq() heavily,
- * so we have to be careful not to interfere with a
- * running system.
- */
- if (new->flags & IRQF_SAMPLE_RANDOM) {
- /*
- * This function might sleep, we want to call it first,
- * outside of the atomic block.
- * Yes, this might clear the entropy pool if the wrong
- * driver is attempted to be loaded, without actually
- * installing a new handler, but is this really a problem,
- * only the sysadmin is able to do this.
- */
- rand_initialize_irq(irq);
- }
-
- /* must juggle the interrupt processing stuff with interrupts disabled */
- spin_lock_irqsave(&level->lock, flags);
-
- /* can't share interrupts unless all parties agree to */
- if (level->usage != 0 && !(level->flags & new->flags & IRQF_SHARED)) {
- spin_unlock_irqrestore(&level->lock,flags);
- return -EBUSY;
- }
-
- /* add new interrupt at end of irq queue */
- pp = p;
- while (*pp)
- pp = &(*pp)->next;
-
- *pp = new;
-
- level->usage++;
- level->flags = new->flags;
-
- /* turn the interrupts on */
- if (level->usage == 1)
- __clr_MASK(level - frv_irq_levels);
-
- if (p == pp && group->control)
- group->control(group, irq & (NR_IRQ_ACTIONS_PER_GROUP - 1), 1);
-
- spin_unlock_irqrestore(&level->lock, flags);
- register_irq_proc(irq);
- return 0;
-}
-
-static struct proc_dir_entry * root_irq_dir;
-static struct proc_dir_entry * irq_dir [NR_IRQS];
-
-#define HEX_DIGITS 8
-
-static unsigned int parse_hex_value (const char __user *buffer,
- unsigned long count, unsigned long *ret)
-{
- unsigned char hexnum [HEX_DIGITS];
- unsigned long value;
- int i;
-
- if (!count)
- return -EINVAL;
- if (count > HEX_DIGITS)
- count = HEX_DIGITS;
- if (copy_from_user(hexnum, buffer, count))
- return -EFAULT;
-
- /*
- * Parse the first 8 characters as a hex string, any non-hex char
- * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same.
- */
- value = 0;
-
- for (i = 0; i < count; i++) {
- unsigned int c = hexnum[i];
-
- switch (c) {
- case '0' ... '9': c -= '0'; break;
- case 'a' ... 'f': c -= 'a'-10; break;
- case 'A' ... 'F': c -= 'A'-10; break;
- default:
- goto out;
- }
- value = (value << 4) | c;
- }
-out:
- *ret = value;
- return 0;
-}
-
-
-static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- unsigned long *mask = (unsigned long *) data;
- if (count < HEX_DIGITS+1)
- return -EINVAL;
- return sprintf (page, "%08lx\n", *mask);
-}
-
-static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer,
- unsigned long count, void *data)
-{
- unsigned long *mask = (unsigned long *) data, full_count = count, err;
- unsigned long new_value;
-
- show_state();
- err = parse_hex_value(buffer, count, &new_value);
- if (err)
- return err;
-
- *mask = new_value;
- return full_count;
-}
-
-#define MAX_NAMELEN 10
-
-static void register_irq_proc (unsigned int irq)
-{
- char name [MAX_NAMELEN];
-
- if (!root_irq_dir || irq_dir[irq])
- return;
-
- memset(name, 0, MAX_NAMELEN);
- sprintf(name, "%d", irq);
-
- /* create /proc/irq/1234 */
- irq_dir[irq] = proc_mkdir(name, root_irq_dir);
-}
-
-unsigned long prof_cpu_mask = -1;
-
-void init_irq_proc (void)
+void __init init_IRQ(void)
{
- struct proc_dir_entry *entry;
- int i;
+ int level;
- /* create /proc/irq */
- root_irq_dir = proc_mkdir("irq", NULL);
+ for (level = 1; level <= 14; level++)
+ set_irq_chip_and_handler(level, &frv_cpu_pic,
+ handle_level_irq);
- /* create /proc/irq/prof_cpu_mask */
- entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
- if (!entry)
- return;
+ set_irq_handler(IRQ_CPU_TIMER0, handle_edge_irq);
- entry->nlink = 1;
- entry->data = (void *)&prof_cpu_mask;
- entry->read_proc = prof_cpu_mask_read_proc;
- entry->write_proc = prof_cpu_mask_write_proc;
-
- /*
- * Create entries for all existing IRQs.
+ /* set the trigger levels for internal interrupt sources
+ * - timers all falling-edge
+ * - ERR0 is rising-edge
+ * - all others are high-level
*/
- for (i = 0; i < NR_IRQS; i++)
- register_irq_proc(i);
-}
+ __set_IITMR(0, 0x003f0000); /* DMA0-3, TIMER0-2 */
+ __set_IITMR(1, 0x20000000); /* ERR0-1, UART0-1, DMA4-7 */
+
+ /* route internal interrupts */
+ set_IRR(4, IRQ_DMA3_LEVEL, IRQ_DMA2_LEVEL, IRQ_DMA1_LEVEL,
+ IRQ_DMA0_LEVEL);
+ set_IRR(5, 0, IRQ_TIMER2_LEVEL, IRQ_TIMER1_LEVEL, IRQ_TIMER0_LEVEL);
+ set_IRR(6, IRQ_GDBSTUB_LEVEL, IRQ_GDBSTUB_LEVEL,
+ IRQ_UART1_LEVEL, IRQ_UART0_LEVEL);
+ set_IRR(7, IRQ_DMA7_LEVEL, IRQ_DMA6_LEVEL, IRQ_DMA5_LEVEL,
+ IRQ_DMA4_LEVEL);
+
+ /* route external interrupts */
+ set_IRR(2, IRQ_XIRQ7_LEVEL, IRQ_XIRQ6_LEVEL, IRQ_XIRQ5_LEVEL,
+ IRQ_XIRQ4_LEVEL);
+ set_IRR(3, IRQ_XIRQ3_LEVEL, IRQ_XIRQ2_LEVEL, IRQ_XIRQ1_LEVEL,
+ IRQ_XIRQ0_LEVEL);
+
+#if defined(CONFIG_MB93091_VDK)
+ __set_TM1(0x55550000); /* XIRQ7-0 all active low */
+#elif defined(CONFIG_MB93093_PDK)
+ __set_TM1(0x15550000); /* XIRQ7 active high, 6-0 all active low */
+#else
+#error dont know external IRQ trigger levels for this setup
+#endif
-/*****************************************************************************/
-/*
- * initialise the interrupt system
- */
-void __init init_IRQ(void)
-{
- route_cpu_irqs();
fpga_init();
#ifdef CONFIG_FUJITSU_MB93493
- route_mb93493_irqs();
+ mb93493_init();
#endif
-} /* end init_IRQ() */
+}
diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c
index af08ccd4ed6e..d96a57e5f030 100644
--- a/arch/frv/kernel/setup.c
+++ b/arch/frv/kernel/setup.c
@@ -43,7 +43,6 @@
#include <asm/mb-regs.h>
#include <asm/mb93493-regs.h>
#include <asm/gdb-stub.h>
-#include <asm/irq-routing.h>
#include <asm/io.h>
#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index 68a77fe3bb40..3d0284bccb94 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -26,7 +26,6 @@
#include <asm/timer-regs.h>
#include <asm/mb-regs.h>
#include <asm/mb86943a.h>
-#include <asm/irq-routing.h>
#include <linux/timex.h>
diff --git a/arch/frv/mb93090-mb00/pci-irq.c b/arch/frv/mb93090-mb00/pci-irq.c
index 2278c80bd88c..ba587523c015 100644
--- a/arch/frv/mb93090-mb00/pci-irq.c
+++ b/arch/frv/mb93090-mb00/pci-irq.c
@@ -15,7 +15,6 @@
#include <asm/io.h>
#include <asm/smp.h>
-#include <asm/irq-routing.h>
#include "pci-frv.h"
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c
index b5b4286f9dd4..3f3a0ed3539b 100644
--- a/arch/frv/mm/init.c
+++ b/arch/frv/mm/init.c
@@ -98,7 +98,7 @@ void show_mem(void)
*/
void __init paging_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned long zones_size[MAX_NR_ZONES] = {0, };
/* allocate some pages for kernel housekeeping tasks */
empty_bad_page_table = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index d3d40bdc2d6a..e4f4199f97ab 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -138,7 +138,7 @@ void paging_init(void)
#endif
{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned long zones_size[MAX_NR_ZONES] = {0, };
zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT;
zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT;
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index b2751eadbc56..6189b0c28d6f 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -494,7 +494,7 @@ config HIGHMEM64G
endchoice
choice
- depends on EXPERIMENTAL && !X86_PAE
+ depends on EXPERIMENTAL
prompt "Memory split" if EMBEDDED
default VMSPLIT_3G
help
@@ -516,6 +516,7 @@ choice
config VMSPLIT_3G
bool "3G/1G user/kernel split"
config VMSPLIT_3G_OPT
+ depends on !HIGHMEM
bool "3G/1G user/kernel split (for full 1G low memory)"
config VMSPLIT_2G
bool "2G/2G user/kernel split"
@@ -794,6 +795,7 @@ config HOTPLUG_CPU
config COMPAT_VDSO
bool "Compat VDSO support"
default y
+ depends on !PARAVIRT
help
Map the VDSO to the predictable old-style address too.
---help---
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 8591f2fa920c..ff9ce4b5eaa8 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -1154,9 +1154,11 @@ out:
static void set_time(void)
{
+ struct timespec ts;
if (got_clock_diff) { /* Must know time zone in order to set clock */
- xtime.tv_sec = get_cmos_time() + clock_cmos_diff;
- xtime.tv_nsec = 0;
+ ts.tv_sec = get_cmos_time() + clock_cmos_diff;
+ ts.tv_nsec = 0;
+ do_settimeofday(&ts);
}
}
@@ -1232,13 +1234,8 @@ static int suspend(int vetoable)
restore_processor_state();
local_irq_disable();
- write_seqlock(&xtime_lock);
- spin_lock(&i8253_lock);
- reinit_timer();
set_time();
-
- spin_unlock(&i8253_lock);
- write_sequnlock(&xtime_lock);
+ reinit_timer();
if (err == APM_NO_ERROR)
err = APM_SUCCESS;
@@ -1365,9 +1362,7 @@ static void check_events(void)
ignore_bounce = 1;
if ((event != APM_NORMAL_RESUME)
|| (ignore_normal_resume == 0)) {
- write_seqlock_irq(&xtime_lock);
set_time();
- write_sequnlock_irq(&xtime_lock);
device_resume();
pm_send_all(PM_RESUME, (void *)0);
queue_event(event, NULL);
@@ -1383,9 +1378,7 @@ static void check_events(void)
break;
case APM_UPDATE_TIME:
- write_seqlock_irq(&xtime_lock);
set_time();
- write_sequnlock_irq(&xtime_lock);
break;
case APM_CRITICAL_SUSPEND:
@@ -2339,6 +2332,7 @@ static int __init apm_init(void)
ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD);
if (ret < 0) {
printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n");
+ remove_proc_entry("apm", NULL);
return -ENOMEM;
}
@@ -2348,7 +2342,13 @@ static int __init apm_init(void)
return 0;
}
- misc_register(&apm_device);
+ /*
+ * Note we don't actually care if the misc_device cannot be registered.
+ * this driver can do its job without it, even if userspace can't
+ * control it. just log the error
+ */
+ if (misc_register(&apm_device))
+ printk(KERN_WARNING "apm: Could not register misc device.\n");
if (HZ != 100)
idle_period = (idle_period * HZ) / 100;
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index 169ac8e0db68..0b61eed8bbd8 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -243,7 +243,7 @@ static DEFINE_SPINLOCK(set_atomicity_lock);
* has been called.
*/
-static void prepare_set(void)
+static void prepare_set(void) __acquires(set_atomicity_lock)
{
unsigned long cr0;
@@ -274,7 +274,7 @@ static void prepare_set(void)
mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi);
}
-static void post_set(void)
+static void post_set(void) __releases(set_atomicity_lock)
{
/* Flush TLBs (no need to flush caches - they are disabled) */
__flush_tlb();
diff --git a/arch/i386/kernel/efi_stub.S b/arch/i386/kernel/efi_stub.S
index d3ee73a3eee3..ef00bb77d7e4 100644
--- a/arch/i386/kernel/efi_stub.S
+++ b/arch/i386/kernel/efi_stub.S
@@ -7,7 +7,6 @@
#include <linux/linkage.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
/*
* efi_call_phys(void *, ...) is a function with variable parameters.
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index 54cfeabbc5e4..84278e0093a2 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -145,14 +145,10 @@ real_mode_gdt_entries [3] =
0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */
};
-static struct
-{
- unsigned short size __attribute__ ((packed));
- unsigned long long * base __attribute__ ((packed));
-}
-real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries },
-real_mode_idt = { 0x3ff, NULL },
-no_idt = { 0, NULL };
+static struct Xgt_desc_struct
+real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries },
+real_mode_idt = { 0x3ff, 0 },
+no_idt = { 0, 0 };
/* This is 16-bit protected mode code to disable paging and the cache,
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index f1682206d304..16d99444cf66 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -53,6 +53,7 @@
#include <asm/apic.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
+#include <asm/mmzone.h>
#include <asm/setup.h>
#include <asm/arch_hooks.h>
#include <asm/sections.h>
@@ -934,6 +935,24 @@ static void __init parse_cmdline_early (char ** cmdline_p)
}
/*
+ * reservetop=size reserves a hole at the top of the kernel address space which
+ * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
+ * so relocating the fixmap can be done before paging initialization.
+ */
+static int __init parse_reservetop(char *arg)
+{
+ unsigned long address;
+
+ if (!arg)
+ return -EINVAL;
+
+ address = memparse(arg, &arg);
+ reserve_top_address(address);
+ return 0;
+}
+early_param("reservetop", parse_reservetop);
+
+/*
* Callback for efi_memory_walk.
*/
static int __init
@@ -1181,7 +1200,7 @@ static unsigned long __init setup_memory(void)
void __init zone_sizes_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned long zones_size[MAX_NR_ZONES] = { 0, };
unsigned int max_dma, low;
max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
@@ -1258,7 +1277,7 @@ void __init setup_bootmem_allocator(void)
*/
find_smp_config();
#endif
-
+ numa_kva_reserve();
#ifdef CONFIG_BLK_DEV_INITRD
if (LOADER_TYPE && INITRD_START) {
if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index c10789d7a9d3..465188e2d701 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -634,3 +634,69 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs)
}
}
+/*
+ * this function sends a 'generic call function' IPI to one other CPU
+ * in the system.
+ *
+ * cpu is a standard Linux logical CPU number.
+ */
+static void
+__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+ int nonatomic, int wait)
+{
+ struct call_data_struct data;
+ int cpus = 1;
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ call_data = &data;
+ wmb();
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ cpu_relax();
+
+ if (!wait)
+ return;
+
+ while (atomic_read(&data.finished) != cpus)
+ cpu_relax();
+}
+
+/*
+ * smp_call_function_single - Run a function on another CPU
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: Currently unused.
+ * @wait: If true, wait until function has completed on other CPUs.
+ *
+ * Retrurns 0 on success, else a negative status code.
+ *
+ * Does not return until the remote CPU is nearly ready to execute <func>
+ * or is or has executed.
+ */
+
+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+ int nonatomic, int wait)
+{
+ /* prevent preemption and reschedule on another processor */
+ int me = get_cpu();
+ if (cpu == me) {
+ WARN_ON(1);
+ put_cpu();
+ return -EBUSY;
+ }
+ spin_lock_bh(&call_lock);
+ __smp_call_function_single(cpu, func, info, nonatomic, wait);
+ spin_unlock_bh(&call_lock);
+ put_cpu();
+ return 0;
+}
+EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index f948419c888a..efe07990e7fc 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -642,9 +642,13 @@ static void map_cpu_to_logical_apicid(void)
{
int cpu = smp_processor_id();
int apicid = logical_smp_processor_id();
+ int node = apicid_to_node(apicid);
+
+ if (!node_online(node))
+ node = first_online_node;
cpu_2_logical_apicid[cpu] = apicid;
- map_cpu_to_node(cpu, apicid_to_node(apicid));
+ map_cpu_to_node(cpu, node);
}
static void unmap_cpu_to_logical_apicid(int cpu)
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c
index b1809c9a0899..83db411b3aa7 100644
--- a/arch/i386/kernel/srat.c
+++ b/arch/i386/kernel/srat.c
@@ -42,7 +42,7 @@
#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8)
static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */
-#define MAX_CHUNKS_PER_NODE 4
+#define MAX_CHUNKS_PER_NODE 3
#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
struct node_memory_chunk_s {
unsigned long start_pfn;
@@ -135,9 +135,6 @@ static void __init parse_memory_affinity_structure (char *sratp)
"enabled and removable" : "enabled" ) );
}
-#if MAX_NR_ZONES != 4
-#error "MAX_NR_ZONES != 4, chunk_to_zone requires review"
-#endif
/* Take a chunk of pages from page frame cstart to cend and count the number
* of pages in each zone, returned via zones[].
*/
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index edd00f6cee37..1302e4ab3c4f 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -270,16 +270,19 @@ void notify_arch_cmos_timer(void)
mod_timer(&sync_cmos_timer, jiffies + 1);
}
-static long clock_cmos_diff, sleep_start;
+static long clock_cmos_diff;
+static unsigned long sleep_start;
static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
/*
* Estimate time zone so that set_time can update the clock
*/
- clock_cmos_diff = -get_cmos_time();
+ unsigned long ctime = get_cmos_time();
+
+ clock_cmos_diff = -ctime;
clock_cmos_diff += get_seconds();
- sleep_start = get_cmos_time();
+ sleep_start = ctime;
return 0;
}
@@ -287,18 +290,29 @@ static int timer_resume(struct sys_device *dev)
{
unsigned long flags;
unsigned long sec;
- unsigned long sleep_length;
-
+ unsigned long ctime = get_cmos_time();
+ long sleep_length = (ctime - sleep_start) * HZ;
+ struct timespec ts;
+
+ if (sleep_length < 0) {
+ printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n");
+ /* The time after the resume must not be earlier than the time
+ * before the suspend or some nasty things will happen
+ */
+ sleep_length = 0;
+ ctime = sleep_start;
+ }
#ifdef CONFIG_HPET_TIMER
if (is_hpet_enabled())
hpet_reenable();
#endif
setup_pit_timer();
- sec = get_cmos_time() + clock_cmos_diff;
- sleep_length = (get_cmos_time() - sleep_start) * HZ;
+
+ sec = ctime + clock_cmos_diff;
+ ts.tv_sec = sec;
+ ts.tv_nsec = 0;
+ do_settimeofday(&ts);
write_seqlock_irqsave(&xtime_lock, flags);
- xtime.tv_sec = sec;
- xtime.tv_nsec = 0;
jiffies_64 += sleep_length;
wall_jiffies += sleep_length;
write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -334,10 +348,11 @@ extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
static void __init hpet_time_init(void)
{
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
+ struct timespec ts;
+ ts.tv_sec = get_cmos_time();
+ ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+
+ do_settimeofday(&ts);
if ((hpet_enable() >= 0) && hpet_use_timer) {
printk("Using HPET for base-timer\n");
@@ -349,6 +364,7 @@ static void __init hpet_time_init(void)
void __init time_init(void)
{
+ struct timespec ts;
#ifdef CONFIG_HPET_TIMER
if (is_hpet_capable()) {
/*
@@ -359,10 +375,10 @@ void __init time_init(void)
return;
}
#endif
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
+ ts.tv_sec = get_cmos_time();
+ ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+
+ do_settimeofday(&ts);
time_init_hook();
}
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
index 14a1376fedd1..6bf14a4e995e 100644
--- a/arch/i386/kernel/time_hpet.c
+++ b/arch/i386/kernel/time_hpet.c
@@ -301,23 +301,25 @@ int hpet_rtc_timer_init(void)
hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
local_irq_save(flags);
+
cnt = hpet_readl(HPET_COUNTER);
cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
hpet_writel(cnt, HPET_T1_CMP);
hpet_t1_cmp = cnt;
- local_irq_restore(flags);
cfg = hpet_readl(HPET_T1_CFG);
cfg &= ~HPET_TN_PERIODIC;
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
hpet_writel(cfg, HPET_T1_CFG);
+ local_irq_restore(flags);
+
return 1;
}
static void hpet_rtc_timer_reinit(void)
{
- unsigned int cfg, cnt;
+ unsigned int cfg, cnt, ticks_per_int, lost_ints;
if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
cfg = hpet_readl(HPET_T1_CFG);
@@ -332,10 +334,33 @@ static void hpet_rtc_timer_reinit(void)
hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
/* It is more accurate to use the comparator value than current count.*/
- cnt = hpet_t1_cmp;
- cnt += hpet_tick*HZ/hpet_rtc_int_freq;
- hpet_writel(cnt, HPET_T1_CMP);
- hpet_t1_cmp = cnt;
+ ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
+ hpet_t1_cmp += ticks_per_int;
+ hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
+
+ /*
+ * If the interrupt handler was delayed too long, the write above tries
+ * to schedule the next interrupt in the past and the hardware would
+ * not interrupt until the counter had wrapped around.
+ * So we have to check that the comparator wasn't set to a past time.
+ */
+ cnt = hpet_readl(HPET_COUNTER);
+ if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
+ lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
+ /* Make sure that, even with the time needed to execute
+ * this code, the next scheduled interrupt has been moved
+ * back to the future: */
+ lost_ints++;
+
+ hpet_t1_cmp += lost_ints * ticks_per_int;
+ hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
+
+ if (PIE_on)
+ PIE_count += lost_ints;
+
+ printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
+ hpet_rtc_int_freq);
+ }
}
/*
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 7e9edafffd8a..4fcc6690be99 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -313,6 +313,8 @@ void show_registers(struct pt_regs *regs)
*/
if (in_kernel) {
u8 __user *eip;
+ int code_bytes = 64;
+ unsigned char c;
printk("\n" KERN_EMERG "Stack: ");
show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
@@ -320,9 +322,12 @@ void show_registers(struct pt_regs *regs)
printk(KERN_EMERG "Code: ");
eip = (u8 __user *)regs->eip - 43;
- for (i = 0; i < 64; i++, eip++) {
- unsigned char c;
-
+ if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
+ /* try starting at EIP */
+ eip = (u8 __user *)regs->eip;
+ code_bytes = 32;
+ }
+ for (i = 0; i < code_bytes; i++, eip++) {
if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
printk(" Bad EIP value.");
break;
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 2d4f1386e2b1..1e7ac1c44ddc 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
ENTRY(phys_startup_32)
jiffies = jiffies_64;
+
+PHDRS {
+ text PT_LOAD FLAGS(5); /* R_E */
+ data PT_LOAD FLAGS(7); /* RWE */
+ note PT_NOTE FLAGS(4); /* R__ */
+}
SECTIONS
{
. = __KERNEL_START;
@@ -26,7 +32,7 @@ SECTIONS
KPROBES_TEXT
*(.fixup)
*(.gnu.warning)
- } = 0x9090
+ } :text = 0x9090
_etext = .; /* End of text section */
@@ -48,7 +54,7 @@ SECTIONS
.data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
*(.data)
CONSTRUCTORS
- }
+ } :data
. = ALIGN(4096);
__nosave_begin = .;
@@ -184,4 +190,6 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
+
+ NOTES
}
diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c
index 50f6de6ff64d..f39887359e8e 100644
--- a/arch/i386/mach-voyager/voyager_thread.c
+++ b/arch/i386/mach-voyager/voyager_thread.c
@@ -130,7 +130,6 @@ thread(void *unused)
init_timer(&wakeup_timer);
sigfillset(&current->blocked);
- current->signal->tty = NULL;
printk(KERN_NOTICE "Voyager starting monitor thread\n");
diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c
index 5d44f4f5ff59..4de11f508c3a 100644
--- a/arch/i386/mm/boot_ioremap.c
+++ b/arch/i386/mm/boot_ioremap.c
@@ -29,8 +29,11 @@
*/
#define BOOT_PTE_PTRS (PTRS_PER_PTE*2)
-#define boot_pte_index(address) \
- (((address) >> PAGE_SHIFT) & (BOOT_PTE_PTRS - 1))
+
+static unsigned long boot_pte_index(unsigned long vaddr)
+{
+ return __pa(vaddr) >> PAGE_SHIFT;
+}
static inline boot_pte_t* boot_vaddr_to_pte(void *address)
{
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index 7c392dc553b8..fb5d8b747de4 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -117,7 +117,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
void *node_remap_end_vaddr[MAX_NUMNODES];
void *node_remap_alloc_vaddr[MAX_NUMNODES];
-
+static unsigned long kva_start_pfn;
+static unsigned long kva_pages;
/*
* FLAT - support for basic PC memory model with discontig enabled, essentially
* a single node with all available processors in it with a flat
@@ -286,7 +287,6 @@ unsigned long __init setup_memory(void)
{
int nid;
unsigned long system_start_pfn, system_max_low_pfn;
- unsigned long reserve_pages;
/*
* When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -298,14 +298,23 @@ unsigned long __init setup_memory(void)
find_max_pfn();
get_memcfg_numa();
- reserve_pages = calculate_numa_remap_pages();
+ kva_pages = calculate_numa_remap_pages();
/* partially used pages are not usable - thus round upwards */
system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
- system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages;
- printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n",
- reserve_pages, max_low_pfn + reserve_pages);
+ kva_start_pfn = find_max_low_pfn() - kva_pages;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ /* Numa kva area is below the initrd */
+ if (LOADER_TYPE && INITRD_START)
+ kva_start_pfn = PFN_DOWN(INITRD_START) - kva_pages;
+#endif
+ kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1);
+
+ system_max_low_pfn = max_low_pfn = find_max_low_pfn();
+ printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
+ kva_start_pfn, max_low_pfn);
printk("max_pfn = %ld\n", max_pfn);
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
@@ -323,7 +332,7 @@ unsigned long __init setup_memory(void)
(ulong) pfn_to_kaddr(max_low_pfn));
for_each_online_node(nid) {
node_remap_start_vaddr[nid] = pfn_to_kaddr(
- highstart_pfn + node_remap_offset[nid]);
+ kva_start_pfn + node_remap_offset[nid]);
/* Init the node remap allocator */
node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
(node_remap_size[nid] * PAGE_SIZE);
@@ -338,7 +347,6 @@ unsigned long __init setup_memory(void)
}
printk("High memory starts at vaddr %08lx\n",
(ulong) pfn_to_kaddr(highstart_pfn));
- vmalloc_earlyreserve = reserve_pages * PAGE_SIZE;
for_each_online_node(nid)
find_max_pfn_node(nid);
@@ -348,13 +356,18 @@ unsigned long __init setup_memory(void)
return max_low_pfn;
}
+void __init numa_kva_reserve(void)
+{
+ reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages));
+}
+
void __init zone_sizes_init(void)
{
int nid;
for_each_online_node(nid) {
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned long zones_size[MAX_NR_ZONES] = {0, };
unsigned long *zholes_size;
unsigned int max_dma;
@@ -409,7 +422,7 @@ void __init set_highmem_pages_init(int bad_ppro)
zone_end_pfn = zone_start_pfn + zone->spanned_pages;
printk("Initializing %s for node %d (%08lx:%08lx)\n",
- zone->name, zone->zone_pgdat->node_id,
+ zone->name, zone_to_nid(zone),
zone_start_pfn, zone_end_pfn);
for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) {
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 89e8486aac34..efd0bcdac65d 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -629,6 +629,48 @@ void __init mem_init(void)
(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
);
+#if 1 /* double-sanity-check paranoia */
+ printk("virtual kernel memory layout:\n"
+ " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
+#ifdef CONFIG_HIGHMEM
+ " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
+#endif
+ " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
+ " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
+ " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
+ " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
+ " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
+ FIXADDR_START, FIXADDR_TOP,
+ (FIXADDR_TOP - FIXADDR_START) >> 10,
+
+#ifdef CONFIG_HIGHMEM
+ PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
+ (LAST_PKMAP*PAGE_SIZE) >> 10,
+#endif
+
+ VMALLOC_START, VMALLOC_END,
+ (VMALLOC_END - VMALLOC_START) >> 20,
+
+ (unsigned long)__va(0), (unsigned long)high_memory,
+ ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
+
+ (unsigned long)&__init_begin, (unsigned long)&__init_end,
+ ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
+
+ (unsigned long)&_etext, (unsigned long)&_edata,
+ ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
+
+ (unsigned long)&_text, (unsigned long)&_etext,
+ ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
+
+#ifdef CONFIG_HIGHMEM
+ BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
+ BUG_ON(VMALLOC_END > PKMAP_BASE);
+#endif
+ BUG_ON(VMALLOC_START > VMALLOC_END);
+ BUG_ON((unsigned long)high_memory > VMALLOC_START);
+#endif /* double-sanity-check paranoia */
+
#ifdef CONFIG_X86_PAE
if (!cpu_has_pae)
panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
@@ -657,7 +699,7 @@ void __init mem_init(void)
int arch_add_memory(int nid, u64 start, u64 size)
{
struct pglist_data *pgdata = &contig_page_data;
- struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+ struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index bd98768d8764..10126e3f8174 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -12,6 +12,7 @@
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
+#include <linux/module.h>
#include <asm/system.h>
#include <asm/pgtable.h>
@@ -60,7 +61,9 @@ void show_mem(void)
printk(KERN_INFO "%lu pages writeback\n",
global_page_state(NR_WRITEBACK));
printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
- printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
+ printk(KERN_INFO "%lu pages slab\n",
+ global_page_state(NR_SLAB_RECLAIMABLE) +
+ global_page_state(NR_SLAB_UNRECLAIMABLE));
printk(KERN_INFO "%lu pages pagetables\n",
global_page_state(NR_PAGETABLE));
}
@@ -137,6 +140,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
__flush_tlb_one(vaddr);
}
+static int fixmaps;
+#ifndef CONFIG_COMPAT_VDSO
+unsigned long __FIXADDR_TOP = 0xfffff000;
+EXPORT_SYMBOL(__FIXADDR_TOP);
+#endif
+
void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
{
unsigned long address = __fix_to_virt(idx);
@@ -146,6 +155,25 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
return;
}
set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+ fixmaps++;
+}
+
+/**
+ * reserve_top_address - reserves a hole in the top of kernel address space
+ * @reserve - size of hole to reserve
+ *
+ * Can be used to relocate the fixmap area and poke a hole in the top
+ * of kernel address space to make room for a hypervisor.
+ */
+void reserve_top_address(unsigned long reserve)
+{
+ BUG_ON(fixmaps > 0);
+#ifdef CONFIG_COMPAT_VDSO
+ BUG_ON(reserve != 0);
+#else
+ __FIXADDR_TOP = -reserve - PAGE_SIZE;
+ __VMALLOC_RESERVE += reserve;
+#endif
}
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S
index c893b897217f..8a2b50a0aaad 100644
--- a/arch/i386/power/swsusp.S
+++ b/arch/i386/power/swsusp.S
@@ -32,7 +32,7 @@ ENTRY(swsusp_arch_resume)
movl $swsusp_pg_dir-__PAGE_OFFSET, %ecx
movl %ecx, %cr3
- movl pagedir_nosave, %edx
+ movl restore_pblist, %edx
.p2align 4,,7
copy_loop:
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index db274da7dba1..f521f2f60a78 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -66,15 +66,6 @@ config IA64_UNCACHED_ALLOCATOR
bool
select GENERIC_ALLOCATOR
-config DMA_IS_DMA32
- bool
- default y
-
-config DMA_IS_NORMAL
- bool
- depends on IA64_SGI_SN2
- default y
-
config AUDIT_ARCH
bool
default y
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 0176556aeecc..32c3abededc6 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -771,16 +771,19 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
{
#ifdef CONFIG_ACPI_NUMA
int pxm_id;
+ int nid;
pxm_id = acpi_get_pxm(handle);
-
/*
- * Assuming that the container driver would have set the proximity
- * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag
+ * We don't have cpu-only-node hotadd. But if the system equips
+ * SRAT table, pxm is already found and node is ready.
+ * So, just pxm_to_nid(pxm) is OK.
+ * This code here is for the system which doesn't have full SRAT
+ * table for possible cpus.
*/
- node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id);
-
+ nid = acpi_map_pxm_to_node(pxm_id);
node_cpuid[cpu].phys_id = physid;
+ node_cpuid[cpu].nid = nid;
#endif
return (0);
}
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index 1cc360c83e7a..20340631179f 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -29,6 +29,36 @@ EXPORT_SYMBOL(cpu_to_node_map);
cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+void __cpuinit map_cpu_to_node(int cpu, int nid)
+{
+ int oldnid;
+ if (nid < 0) { /* just initialize by zero */
+ cpu_to_node_map[cpu] = 0;
+ return;
+ }
+ /* sanity check first */
+ oldnid = cpu_to_node_map[cpu];
+ if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) {
+ return; /* nothing to do */
+ }
+ /* we don't have cpu-driven node hot add yet...
+ In usual case, node is created from SRAT at boot time. */
+ if (!node_online(nid))
+ nid = first_online_node;
+ cpu_to_node_map[cpu] = nid;
+ cpu_set(cpu, node_to_cpu_mask[nid]);
+ return;
+}
+
+void __cpuinit unmap_cpu_from_node(int cpu, int nid)
+{
+ WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid]));
+ WARN_ON(cpu_to_node_map[cpu] != nid);
+ cpu_to_node_map[cpu] = 0;
+ cpu_clear(cpu, node_to_cpu_mask[nid]);
+}
+
+
/**
* build_cpu_to_node_map - setup cpu to node and node to cpumask arrays
*
@@ -49,8 +79,6 @@ void __init build_cpu_to_node_map(void)
node = node_cpuid[i].nid;
break;
}
- cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
- if (node >= 0)
- cpu_set(cpu, node_to_cpu_mask[node]);
+ map_cpu_to_node(cpu, node);
}
}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 84a7e52f56f6..7bb7696e4ce2 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -34,6 +34,7 @@
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/vfs.h>
+#include <linux/smp.h>
#include <linux/pagemap.h>
#include <linux/mount.h>
#include <linux/bitops.h>
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index f648c610b10c..05bdf7affb43 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,6 +36,9 @@ int arch_register_cpu(int num)
*/
if (!can_cpei_retarget() && is_cpu_cpei_target(num))
sysfs_cpus[num].cpu.no_control = 1;
+#ifdef CONFIG_NUMA
+ map_cpu_to_node(num, node_cpuid[num].nid);
+#endif
#endif
return register_cpu(&sysfs_cpus[num].cpu, num);
@@ -45,7 +48,8 @@ int arch_register_cpu(int num)
void arch_unregister_cpu(int num)
{
- return unregister_cpu(&sysfs_cpus[num].cpu);
+ unregister_cpu(&sysfs_cpus[num].cpu);
+ unmap_cpu_from_node(num, cpu_to_node(num));
}
EXPORT_SYMBOL(arch_register_cpu);
EXPORT_SYMBOL(arch_unregister_cpu);
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 4c73a6763669..c58e933694d5 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -98,7 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
/* attempt to allocate a granule's worth of cached memory pages */
- page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO,
+ page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
IA64_GRANULE_SHIFT-PAGE_SHIFT);
if (!page) {
mutex_unlock(&uc_pool->add_chunk_mutex);
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 9a8a29339d2d..b632b9c1e3b3 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -32,9 +32,10 @@
#include <linux/cpumask.h>
#include <linux/smp_lock.h>
#include <linux/nodemask.h>
+#include <linux/smp.h>
+
#include <asm/processor.h>
#include <asm/topology.h>
-#include <asm/smp.h>
#include <asm/semaphore.h>
#include <asm/uaccess.h>
#include <asm/sal.h>
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index b71348fec1f4..bbd97c85bc5d 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -100,7 +100,7 @@ void free_initrd_mem(unsigned long, unsigned long);
#ifndef CONFIG_DISCONTIGMEM
unsigned long __init zone_sizes_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned long zones_size[MAX_NR_ZONES] = {0, };
unsigned long max_dma;
unsigned long low;
unsigned long start_pfn;
diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c
index e4c233eef195..06e538d1be3a 100644
--- a/arch/m68knommu/mm/init.c
+++ b/arch/m68knommu/mm/init.c
@@ -136,7 +136,7 @@ void paging_init(void)
#endif
{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned long zones_size[MAX_NR_ZONES] = {0, };
zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT;
zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT;
diff --git a/arch/mips/au1000/common/dbdma.c b/arch/mips/au1000/common/dbdma.c
index 98244d51c154..c4fae8ff4671 100644
--- a/arch/mips/au1000/common/dbdma.c
+++ b/arch/mips/au1000/common/dbdma.c
@@ -230,7 +230,7 @@ EXPORT_SYMBOL(au1xxx_ddma_add_device);
*/
u32
au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
- void (*callback)(int, void *, struct pt_regs *), void *callparam)
+ void (*callback)(int, void *), void *callparam)
{
unsigned long flags;
u32 used, chan, rv;
@@ -248,8 +248,10 @@ au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
au1xxx_dbdma_init();
dbdma_initialized = 1;
- if ((stp = find_dbdev_id(srcid)) == NULL) return 0;
- if ((dtp = find_dbdev_id(destid)) == NULL) return 0;
+ if ((stp = find_dbdev_id(srcid)) == NULL)
+ return 0;
+ if ((dtp = find_dbdev_id(destid)) == NULL)
+ return 0;
used = 0;
rv = 0;
@@ -869,7 +871,7 @@ dbdma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
au_sync();
if (ctp->chan_callback)
- (ctp->chan_callback)(irq, ctp->chan_callparam, regs);
+ (ctp->chan_callback)(irq, ctp->chan_callparam);
ctp->cur_ptr = phys_to_virt(DSCR_GET_NXTPTR(dp->dscr_nxtptr));
return IRQ_RETVAL(1);
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index c52497bb102a..5b06349af2d5 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -163,10 +163,10 @@ static int __init page_is_ram(unsigned long pagenr)
void __init paging_init(void)
{
- unsigned long zones_size[] = { [0 ... MAX_NR_ZONES - 1] = 0 };
+ unsigned long zones_size[] = { 0, };
unsigned long max_dma, high, low;
#ifndef CONFIG_FLATMEM
- unsigned long zholes_size[] = { [0 ... MAX_NR_ZONES - 1] = 0 };
+ unsigned long zholes_size[] = { 0, };
unsigned long i, j, pfn;
#endif
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index efe6971fc800..16e5682b01f1 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -19,6 +19,7 @@
#include <linux/swap.h>
#include <linux/bootmem.h>
#include <linux/pfn.h>
+#include <linux/highmem.h>
#include <asm/page.h>
#include <asm/sections.h>
@@ -508,7 +509,7 @@ extern unsigned long setup_zero_pages(void);
void __init paging_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned long zones_size[MAX_NR_ZONES] = {0, };
unsigned node;
pagetable_init();
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index f2b96f1e0da7..25ad28d63e88 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -551,7 +551,7 @@ void show_mem(void)
printk("Zone list for zone %d on node %d: ", j, i);
for (k = 0; zl->zones[k] != NULL; k++)
- printk("[%d/%s] ", zl->zones[k]->zone_pgdat->node_id, zl->zones[k]->name);
+ printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name);
printk("\n");
}
}
@@ -809,7 +809,7 @@ void __init paging_init(void)
flush_tlb_all_local(NULL);
for (i = 0; i < npmem_ranges; i++) {
- unsigned long zones_size[MAX_NR_ZONES] = { 0, 0, 0 };
+ unsigned long zones_size[MAX_NR_ZONES] = { 0, };
/* We have an IOMMU, so all memory can go into a single
ZONE_DMA zone. */
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index 7369f9a6ad25..69e8f86aa4f8 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -159,8 +159,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
isync
/* Load ptr the list of pages to copy in r3 */
- lis r11,(pagedir_nosave - KERNELBASE)@h
- ori r11,r11,pagedir_nosave@l
+ lis r11,(restore_pblist - KERNELBASE)@h
+ ori r11,r11,restore_pblist@l
lwz r10,0(r11)
/* Copy the pages. This is a very basic implementation, to
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index ab3b0765a64e..8aea3698a77b 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -117,8 +117,7 @@ static void appldata_get_mem_data(void *data)
mem_data->pgpgout = ev[PGPGOUT] >> 1;
mem_data->pswpin = ev[PSWPIN];
mem_data->pswpout = ev[PSWPOUT];
- mem_data->pgalloc = ev[PGALLOC_HIGH] + ev[PGALLOC_NORMAL] +
- ev[PGALLOC_DMA];
+ mem_data->pgalloc = ev[PGALLOC_NORMAL] + ev[PGALLOC_DMA];
mem_data->pgfault = ev[PGFAULT];
mem_data->pgmajfault = ev[PGMAJFAULT];
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 786a44dba5bf..607f50ead1fd 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -15,6 +15,8 @@
#include <linux/sched.h>
#include <linux/sysctl.h>
#include <linux/ctype.h>
+#include <linux/swap.h>
+#include <linux/kthread.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
@@ -34,18 +36,18 @@ struct cmm_page_array {
unsigned long pages[CMM_NR_PAGES];
};
-static long cmm_pages = 0;
-static long cmm_timed_pages = 0;
-static volatile long cmm_pages_target = 0;
-static volatile long cmm_timed_pages_target = 0;
-static long cmm_timeout_pages = 0;
-static long cmm_timeout_seconds = 0;
+static long cmm_pages;
+static long cmm_timed_pages;
+static volatile long cmm_pages_target;
+static volatile long cmm_timed_pages_target;
+static long cmm_timeout_pages;
+static long cmm_timeout_seconds;
-static struct cmm_page_array *cmm_page_list = NULL;
-static struct cmm_page_array *cmm_timed_page_list = NULL;
+static struct cmm_page_array *cmm_page_list;
+static struct cmm_page_array *cmm_timed_page_list;
+static DEFINE_SPINLOCK(cmm_lock);
-static unsigned long cmm_thread_active = 0;
-static struct work_struct cmm_thread_starter;
+static struct task_struct *cmm_thread_ptr;
static wait_queue_head_t cmm_thread_wait;
static struct timer_list cmm_timer;
@@ -53,71 +55,100 @@ static void cmm_timer_fn(unsigned long);
static void cmm_set_timer(void);
static long
-cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list)
+cmm_alloc_pages(long nr, long *counter, struct cmm_page_array **list)
{
- struct cmm_page_array *pa;
- unsigned long page;
+ struct cmm_page_array *pa, *npa;
+ unsigned long addr;
- pa = *list;
- while (pages) {
- page = __get_free_page(GFP_NOIO);
- if (!page)
+ while (nr) {
+ addr = __get_free_page(GFP_NOIO);
+ if (!addr)
break;
+ spin_lock(&cmm_lock);
+ pa = *list;
if (!pa || pa->index >= CMM_NR_PAGES) {
/* Need a new page for the page list. */
- pa = (struct cmm_page_array *)
+ spin_unlock(&cmm_lock);
+ npa = (struct cmm_page_array *)
__get_free_page(GFP_NOIO);
- if (!pa) {
- free_page(page);
+ if (!npa) {
+ free_page(addr);
break;
}
- pa->next = *list;
- pa->index = 0;
- *list = pa;
+ spin_lock(&cmm_lock);
+ pa = *list;
+ if (!pa || pa->index >= CMM_NR_PAGES) {
+ npa->next = pa;
+ npa->index = 0;
+ pa = npa;
+ *list = pa;
+ } else
+ free_page((unsigned long) npa);
}
- diag10(page);
- pa->pages[pa->index++] = page;
+ diag10(addr);
+ pa->pages[pa->index++] = addr;
(*counter)++;
- pages--;
+ spin_unlock(&cmm_lock);
+ nr--;
}
- return pages;
+ return nr;
}
-static void
-cmm_free_pages(long pages, long *counter, struct cmm_page_array **list)
+static long
+cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
{
struct cmm_page_array *pa;
- unsigned long page;
+ unsigned long addr;
+ spin_lock(&cmm_lock);
pa = *list;
- while (pages) {
+ while (nr) {
if (!pa || pa->index <= 0)
break;
- page = pa->pages[--pa->index];
+ addr = pa->pages[--pa->index];
if (pa->index == 0) {
pa = pa->next;
free_page((unsigned long) *list);
*list = pa;
}
- free_page(page);
+ free_page(addr);
(*counter)--;
- pages--;
+ nr--;
}
+ spin_unlock(&cmm_lock);
+ return nr;
}
+static int cmm_oom_notify(struct notifier_block *self,
+ unsigned long dummy, void *parm)
+{
+ unsigned long *freed = parm;
+ long nr = 256;
+
+ nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list);
+ if (nr > 0)
+ nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list);
+ cmm_pages_target = cmm_pages;
+ cmm_timed_pages_target = cmm_timed_pages;
+ *freed += 256 - nr;
+ return NOTIFY_OK;
+}
+
+static struct notifier_block cmm_oom_nb = {
+ .notifier_call = cmm_oom_notify
+};
+
static int
cmm_thread(void *dummy)
{
int rc;
- daemonize("cmmthread");
while (1) {
rc = wait_event_interruptible(cmm_thread_wait,
(cmm_pages != cmm_pages_target ||
- cmm_timed_pages != cmm_timed_pages_target));
- if (rc == -ERESTARTSYS) {
- /* Got kill signal. End thread. */
- clear_bit(0, &cmm_thread_active);
+ cmm_timed_pages != cmm_timed_pages_target ||
+ kthread_should_stop()));
+ if (kthread_should_stop() || rc == -ERESTARTSYS) {
cmm_pages_target = cmm_pages;
cmm_timed_pages_target = cmm_timed_pages;
break;
@@ -143,16 +174,8 @@ cmm_thread(void *dummy)
}
static void
-cmm_start_thread(void)
-{
- kernel_thread(cmm_thread, NULL, 0);
-}
-
-static void
cmm_kick_thread(void)
{
- if (!test_and_set_bit(0, &cmm_thread_active))
- schedule_work(&cmm_thread_starter);
wake_up(&cmm_thread_wait);
}
@@ -177,21 +200,21 @@ cmm_set_timer(void)
static void
cmm_timer_fn(unsigned long ignored)
{
- long pages;
+ long nr;
- pages = cmm_timed_pages_target - cmm_timeout_pages;
- if (pages < 0)
+ nr = cmm_timed_pages_target - cmm_timeout_pages;
+ if (nr < 0)
cmm_timed_pages_target = 0;
else
- cmm_timed_pages_target = pages;
+ cmm_timed_pages_target = nr;
cmm_kick_thread();
cmm_set_timer();
}
void
-cmm_set_pages(long pages)
+cmm_set_pages(long nr)
{
- cmm_pages_target = pages;
+ cmm_pages_target = nr;
cmm_kick_thread();
}
@@ -202,9 +225,9 @@ cmm_get_pages(void)
}
void
-cmm_add_timed_pages(long pages)
+cmm_add_timed_pages(long nr)
{
- cmm_timed_pages_target += pages;
+ cmm_timed_pages_target += nr;
cmm_kick_thread();
}
@@ -215,9 +238,9 @@ cmm_get_timed_pages(void)
}
void
-cmm_set_timeout(long pages, long seconds)
+cmm_set_timeout(long nr, long seconds)
{
- cmm_timeout_pages = pages;
+ cmm_timeout_pages = nr;
cmm_timeout_seconds = seconds;
cmm_set_timer();
}
@@ -245,7 +268,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char buf[16], *p;
- long pages;
+ long nr;
int len;
if (!*lenp || (*ppos && !write)) {
@@ -260,17 +283,17 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
return -EFAULT;
buf[sizeof(buf) - 1] = '\0';
cmm_skip_blanks(buf, &p);
- pages = simple_strtoul(p, &p, 0);
+ nr = simple_strtoul(p, &p, 0);
if (ctl == &cmm_table[0])
- cmm_set_pages(pages);
+ cmm_set_pages(nr);
else
- cmm_add_timed_pages(pages);
+ cmm_add_timed_pages(nr);
} else {
if (ctl == &cmm_table[0])
- pages = cmm_get_pages();
+ nr = cmm_get_pages();
else
- pages = cmm_get_timed_pages();
- len = sprintf(buf, "%ld\n", pages);
+ nr = cmm_get_timed_pages();
+ len = sprintf(buf, "%ld\n", nr);
if (len > *lenp)
len = *lenp;
if (copy_to_user(buffer, buf, len))
@@ -286,7 +309,7 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char buf[64], *p;
- long pages, seconds;
+ long nr, seconds;
int len;
if (!*lenp || (*ppos && !write)) {
@@ -301,10 +324,10 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
return -EFAULT;
buf[sizeof(buf) - 1] = '\0';
cmm_skip_blanks(buf, &p);
- pages = simple_strtoul(p, &p, 0);
+ nr = simple_strtoul(p, &p, 0);
cmm_skip_blanks(p, &p);
seconds = simple_strtoul(p, &p, 0);
- cmm_set_timeout(pages, seconds);
+ cmm_set_timeout(nr, seconds);
} else {
len = sprintf(buf, "%ld %ld\n",
cmm_timeout_pages, cmm_timeout_seconds);
@@ -357,7 +380,7 @@ static struct ctl_table cmm_dir_table[] = {
static void
cmm_smsg_target(char *from, char *msg)
{
- long pages, seconds;
+ long nr, seconds;
if (strlen(sender) > 0 && strcmp(from, sender) != 0)
return;
@@ -366,27 +389,27 @@ cmm_smsg_target(char *from, char *msg)
if (strncmp(msg, "SHRINK", 6) == 0) {
if (!cmm_skip_blanks(msg + 6, &msg))
return;
- pages = simple_strtoul(msg, &msg, 0);
+ nr = simple_strtoul(msg, &msg, 0);
cmm_skip_blanks(msg, &msg);
if (*msg == '\0')
- cmm_set_pages(pages);
+ cmm_set_pages(nr);
} else if (strncmp(msg, "RELEASE", 7) == 0) {
if (!cmm_skip_blanks(msg + 7, &msg))
return;
- pages = simple_strtoul(msg, &msg, 0);
+ nr = simple_strtoul(msg, &msg, 0);
cmm_skip_blanks(msg, &msg);
if (*msg == '\0')
- cmm_add_timed_pages(pages);
+ cmm_add_timed_pages(nr);
} else if (strncmp(msg, "REUSE", 5) == 0) {
if (!cmm_skip_blanks(msg + 5, &msg))
return;
- pages = simple_strtoul(msg, &msg, 0);
+ nr = simple_strtoul(msg, &msg, 0);
if (!cmm_skip_blanks(msg, &msg))
return;
seconds = simple_strtoul(msg, &msg, 0);
cmm_skip_blanks(msg, &msg);
if (*msg == '\0')
- cmm_set_timeout(pages, seconds);
+ cmm_set_timeout(nr, seconds);
}
}
#endif
@@ -396,21 +419,49 @@ struct ctl_table_header *cmm_sysctl_header;
static int
cmm_init (void)
{
+ int rc = -ENOMEM;
+
#ifdef CONFIG_CMM_PROC
cmm_sysctl_header = register_sysctl_table(cmm_dir_table, 1);
+ if (!cmm_sysctl_header)
+ goto out;
#endif
#ifdef CONFIG_CMM_IUCV
- smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
+ rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
+ if (rc < 0)
+ goto out_smsg;
#endif
- INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, NULL);
+ rc = register_oom_notifier(&cmm_oom_nb);
+ if (rc < 0)
+ goto out_oom_notify;
init_waitqueue_head(&cmm_thread_wait);
init_timer(&cmm_timer);
- return 0;
+ cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+ rc = IS_ERR(cmm_thread_ptr) ? PTR_ERR(cmm_thread_ptr) : 0;
+ if (!rc)
+ goto out;
+ /*
+ * kthread_create failed. undo all the stuff from above again.
+ */
+ unregister_oom_notifier(&cmm_oom_nb);
+
+out_oom_notify:
+#ifdef CONFIG_CMM_IUCV
+ smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
+out_smsg:
+#endif
+#ifdef CONFIG_CMM_PROC
+ unregister_sysctl_table(cmm_sysctl_header);
+#endif
+out:
+ return rc;
}
static void
cmm_exit(void)
{
+ kthread_stop(cmm_thread_ptr);
+ unregister_oom_notifier(&cmm_oom_nb);
cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
#ifdef CONFIG_CMM_PROC
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index ad8ed7d41e16..bf94eedb0a8e 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -30,7 +30,7 @@
#define __pte_offset(address) \
((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
+#define pte_offset(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
__pte_offset(address))
static inline void cache_wback_all(void)
diff --git a/arch/sh64/mm/init.c b/arch/sh64/mm/init.c
index 1169757fb38b..83295bd21aa7 100644
--- a/arch/sh64/mm/init.c
+++ b/arch/sh64/mm/init.c
@@ -110,7 +110,7 @@ void show_mem(void)
*/
void __init paging_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned long zones_size[MAX_NR_ZONES] = {0, };
pgd_init((unsigned long)swapper_pg_dir);
pgd_init((unsigned long)swapper_pg_dir +
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 16e13f663ab0..b27a506309ee 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -2175,7 +2175,7 @@ void __init ld_mmu_srmmu(void)
BTFIXUPSET_CALL(pte_pfn, srmmu_pte_pfn, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(pmd_page, srmmu_pmd_page, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(pgd_page, srmmu_pgd_page, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(pgd_page_vaddr, srmmu_pgd_page, BTFIXUPCALL_NORM);
BTFIXUPSET_SETHI(none_mask, 0xF0000000);
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index 7fdddf3c7e16..436021ceb2e7 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -2280,5 +2280,5 @@ void __init ld_mmu_sun4c(void)
/* These should _never_ get called with two level tables. */
BTFIXUPSET_CALL(pgd_set, sun4c_pgd_set, BTFIXUPCALL_NOP);
- BTFIXUPSET_CALL(pgd_page, sun4c_pgd_page, BTFIXUPCALL_RETO0);
+ BTFIXUPSET_CALL(pgd_page_vaddr, sun4c_pgd_page, BTFIXUPCALL_RETO0);
}
diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c
index 8135ec322c9c..642541769a17 100644
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -736,20 +736,15 @@ struct exec_domain solaris_exec_domain = {
extern int init_socksys(void);
-#ifdef MODULE
-
MODULE_AUTHOR("Jakub Jelinek (jj@ultra.linux.cz), Patrik Rak (prak3264@ss1000.ms.mff.cuni.cz)");
MODULE_DESCRIPTION("Solaris binary emulation module");
MODULE_LICENSE("GPL");
-#ifdef __sparc_v9__
extern u32 tl0_solaris[8];
#define update_ttable(x) \
tl0_solaris[3] = (((long)(x) - (long)tl0_solaris - 3) >> 2) | 0x40000000; \
wmb(); \
__asm__ __volatile__ ("flush %0" : : "r" (&tl0_solaris[3]))
-#else
-#endif
extern u32 solaris_sparc_syscall[];
extern u32 solaris_syscall[];
@@ -757,7 +752,7 @@ extern void cleanup_socksys(void);
extern u32 entry64_personality_patch;
-int init_module(void)
+static int __init solaris_init(void)
{
int ret;
@@ -777,19 +772,12 @@ int init_module(void)
return 0;
}
-void cleanup_module(void)
+static void __exit solaris_exit(void)
{
update_ttable(solaris_syscall);
cleanup_socksys();
unregister_exec_domain(&solaris_exec_domain);
}
-#else
-int init_solaris_emul(void)
-{
- register_exec_domain(&solaris_exec_domain);
- init_socksys();
- return 0;
-}
-#endif
-
+module_init(solaris_init);
+module_exit(solaris_exit);
diff --git a/arch/sparc64/solaris/socksys.c b/arch/sparc64/solaris/socksys.c
index bc3df95bc057..7c90e41fd3be 100644
--- a/arch/sparc64/solaris/socksys.c
+++ b/arch/sparc64/solaris/socksys.c
@@ -168,8 +168,7 @@ static struct file_operations socksys_fops = {
.release = socksys_release,
};
-int __init
-init_socksys(void)
+int __init init_socksys(void)
{
int ret;
struct file * file;
@@ -199,8 +198,7 @@ init_socksys(void)
return 0;
}
-void
-cleanup_socksys(void)
+void __exit cleanup_socksys(void)
{
if (unregister_chrdev(30, "socksys"))
printk ("Couldn't unregister socksys character device\n");
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 7218c754505b..e82764f75e7f 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -544,7 +544,7 @@ static struct chan *parse_chan(struct line *line, char *str, int device,
ops = NULL;
data = NULL;
- for(i = 0; i < sizeof(chan_table)/sizeof(chan_table[0]); i++){
+ for(i = 0; i < ARRAY_SIZE(chan_table); i++){
entry = &chan_table[i];
if(!strncmp(str, entry->key, strlen(entry->key))){
ops = entry->ops;
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index b414522f7686..79610b5ce67e 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -497,7 +497,7 @@ static void mconsole_get_config(int (*get_config)(char *, char *, int,
}
error = NULL;
- size = sizeof(default_buf)/sizeof(default_buf[0]);
+ size = ARRAY_SIZE(default_buf);
buf = default_buf;
while(1){
diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
index 9bfd405c3bd8..5b2f5fe9e426 100644
--- a/arch/um/drivers/mconsole_user.c
+++ b/arch/um/drivers/mconsole_user.c
@@ -16,6 +16,7 @@
#include "user.h"
#include "mconsole.h"
#include "umid.h"
+#include "user_util.h"
static struct mconsole_command commands[] = {
/* With uts namespaces, uts information becomes process-specific, so
@@ -65,14 +66,14 @@ static struct mconsole_command *mconsole_parse(struct mc_request *req)
struct mconsole_command *cmd;
int i;
- for(i=0;i<sizeof(commands)/sizeof(commands[0]);i++){
+ for(i = 0; i < ARRAY_SIZE(commands); i++){
cmd = &commands[i];
if(!strncmp(req->request.data, cmd->command,
strlen(cmd->command))){
- return(cmd);
+ return cmd;
}
}
- return(NULL);
+ return NULL;
}
#define MIN(a,b) ((a)<(b) ? (a):(b))
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 501f95675d89..4a7966b21931 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -31,6 +31,11 @@
#include "irq_user.h"
#include "irq_kern.h"
+static inline void set_ether_mac(struct net_device *dev, unsigned char *addr)
+{
+ memcpy(dev->dev_addr, addr, ETH_ALEN);
+}
+
#define DRIVER_NAME "uml-netdev"
static DEFINE_SPINLOCK(opened_lock);
@@ -242,7 +247,7 @@ static int uml_net_set_mac(struct net_device *dev, void *addr)
struct sockaddr *hwaddr = addr;
spin_lock(&lp->lock);
- memcpy(dev->dev_addr, hwaddr->sa_data, ETH_ALEN);
+ set_ether_mac(dev, hwaddr->sa_data);
spin_unlock(&lp->lock);
return(0);
@@ -790,13 +795,6 @@ void dev_ip_addr(void *d, unsigned char *bin_buf)
memcpy(bin_buf, &in->ifa_address, sizeof(in->ifa_address));
}
-void set_ether_mac(void *d, unsigned char *addr)
-{
- struct net_device *dev = d;
-
- memcpy(dev->dev_addr, addr, ETH_ALEN);
-}
-
struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra)
{
if((skb != NULL) && (skb_tailroom(skb) < extra)){
diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c
index 466ff2c2f918..4c767c7adb96 100644
--- a/arch/um/drivers/pcap_kern.c
+++ b/arch/um/drivers/pcap_kern.c
@@ -76,7 +76,7 @@ int pcap_setup(char *str, char **mac_out, void *data)
if(host_if != NULL)
init->host_if = host_if;
- for(i = 0; i < sizeof(options)/sizeof(options[0]); i++){
+ for(i = 0; i < ARRAY_SIZE(options); i++){
if(options[i] == NULL)
continue;
if(!strcmp(options[i], "promisc"))
diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h
index b98bdd8e052a..89e1dc835a5b 100644
--- a/arch/um/include/kern_util.h
+++ b/arch/um/include/kern_util.h
@@ -27,7 +27,6 @@ extern int ncpus;
extern char *linux_prog;
extern char *gdb_init;
extern int kmalloc_ok;
-extern int timer_irq_inited;
extern int jail;
extern int nsyscalls;
diff --git a/arch/um/include/longjmp.h b/arch/um/include/longjmp.h
index 1b5c0131a12e..e93c6d3e893b 100644
--- a/arch/um/include/longjmp.h
+++ b/arch/um/include/longjmp.h
@@ -1,9 +1,12 @@
#ifndef __UML_LONGJMP_H
#define __UML_LONGJMP_H
-#include <setjmp.h>
+#include "sysdep/archsetjmp.h"
#include "os.h"
+extern int setjmp(jmp_buf);
+extern void longjmp(jmp_buf, int);
+
#define UML_LONGJMP(buf, val) do { \
longjmp(*buf, val); \
} while(0)
diff --git a/arch/um/include/net_user.h b/arch/um/include/net_user.h
index 800c403920bc..47ef7cb49a8e 100644
--- a/arch/um/include/net_user.h
+++ b/arch/um/include/net_user.h
@@ -26,7 +26,6 @@ struct net_user_info {
extern void ether_user_init(void *data, void *dev);
extern void dev_ip_addr(void *d, unsigned char *bin_buf);
-extern void set_ether_mac(void *d, unsigned char *addr);
extern void iter_addresses(void *d, void (*cb)(unsigned char *,
unsigned char *, void *),
void *arg);
diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 5316e8a4a4fd..24fb6d8680e1 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -276,9 +276,11 @@ extern int setjmp_wrapper(void (*proc)(void *, void *), ...);
extern void switch_timers(int to_real);
extern void idle_sleep(int secs);
+extern int set_interval(int is_virtual);
+#ifdef CONFIG_MODE_TT
extern void enable_timer(void);
+#endif
extern void disable_timer(void);
-extern void user_time_init(void);
extern void uml_idle_timer(void);
extern unsigned long long os_nsecs(void);
@@ -329,6 +331,7 @@ extern void os_set_ioignore(void);
extern void init_irq_signals(int on_sigstack);
/* sigio.c */
+extern int add_sigio_fd(int fd);
extern int ignore_sigio_fd(int fd);
extern void maybe_sigio_broken(int fd, int read);
diff --git a/arch/um/include/registers.h b/arch/um/include/registers.h
index 83b688ca198f..f845b3629a6d 100644
--- a/arch/um/include/registers.h
+++ b/arch/um/include/registers.h
@@ -7,6 +7,7 @@
#define __REGISTERS_H
#include "sysdep/ptrace.h"
+#include "sysdep/archsetjmp.h"
extern void init_thread_registers(union uml_pt_regs *to);
extern int save_fp_registers(int pid, unsigned long *fp_regs);
@@ -15,6 +16,6 @@ extern void save_registers(int pid, union uml_pt_regs *regs);
extern void restore_registers(int pid, union uml_pt_regs *regs);
extern void init_registers(int pid);
extern void get_safe_registers(unsigned long * regs, unsigned long * fp_regs);
-extern void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer);
+extern unsigned long get_thread_reg(int reg, jmp_buf *buf);
#endif
diff --git a/arch/um/include/sysdep-i386/archsetjmp.h b/arch/um/include/sysdep-i386/archsetjmp.h
new file mode 100644
index 000000000000..ea1ba3d42aee
--- /dev/null
+++ b/arch/um/include/sysdep-i386/archsetjmp.h
@@ -0,0 +1,19 @@
+/*
+ * arch/i386/include/klibc/archsetjmp.h
+ */
+
+#ifndef _KLIBC_ARCHSETJMP_H
+#define _KLIBC_ARCHSETJMP_H
+
+struct __jmp_buf {
+ unsigned int __ebx;
+ unsigned int __esp;
+ unsigned int __ebp;
+ unsigned int __esi;
+ unsigned int __edi;
+ unsigned int __eip;
+};
+
+typedef struct __jmp_buf jmp_buf[1];
+
+#endif /* _SETJMP_H */
diff --git a/arch/um/include/sysdep-i386/signal.h b/arch/um/include/sysdep-i386/signal.h
deleted file mode 100644
index 07518b162136..000000000000
--- a/arch/um/include/sysdep-i386/signal.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2004 PathScale, Inc
- * Licensed under the GPL
- */
-
-#ifndef __I386_SIGNAL_H_
-#define __I386_SIGNAL_H_
-
-#include <signal.h>
-
-#define ARCH_SIGHDLR_PARAM int sig
-
-#define ARCH_GET_SIGCONTEXT(sc, sig) \
- do sc = (struct sigcontext *) (&sig + 1); while(0)
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/sysdep-x86_64/archsetjmp.h b/arch/um/include/sysdep-x86_64/archsetjmp.h
new file mode 100644
index 000000000000..454fc60aff6d
--- /dev/null
+++ b/arch/um/include/sysdep-x86_64/archsetjmp.h
@@ -0,0 +1,21 @@
+/*
+ * arch/x86_64/include/klibc/archsetjmp.h
+ */
+
+#ifndef _KLIBC_ARCHSETJMP_H
+#define _KLIBC_ARCHSETJMP_H
+
+struct __jmp_buf {
+ unsigned long __rbx;
+ unsigned long __rsp;
+ unsigned long __rbp;
+ unsigned long __r12;
+ unsigned long __r13;
+ unsigned long __r14;
+ unsigned long __r15;
+ unsigned long __rip;
+};
+
+typedef struct __jmp_buf jmp_buf[1];
+
+#endif /* _SETJMP_H */
diff --git a/arch/um/include/sysdep-x86_64/signal.h b/arch/um/include/sysdep-x86_64/signal.h
deleted file mode 100644
index 6142897af3d1..000000000000
--- a/arch/um/include/sysdep-x86_64/signal.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2004 PathScale, Inc
- * Licensed under the GPL
- */
-
-#ifndef __X86_64_SIGNAL_H_
-#define __X86_64_SIGNAL_H_
-
-#define ARCH_SIGHDLR_PARAM int sig
-
-#define ARCH_GET_SIGCONTEXT(sc, sig_addr) \
- do { \
- struct ucontext *__uc; \
- asm("movq %%rdx, %0" : "=r" (__uc)); \
- sc = (struct sigcontext *) &__uc->uc_mcontext; \
- } while(0)
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index fc38a6d5906d..0561c43b4685 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -41,9 +41,11 @@ static long execve1(char *file, char __user * __user *argv,
long error;
#ifdef CONFIG_TTY_LOG
- task_lock(current);
+ mutex_lock(&tty_mutex);
+ task_lock(current); /* FIXME: is this needed ? */
log_exec(argv, current->signal->tty);
task_unlock(current);
+ mutex_unlock(&tty_mutex);
#endif
error = do_execve(file, argv, env, &current->thread.regs);
if (error == 0){
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 589c69a75043..ce7f233fc490 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -142,19 +142,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
.events = events,
.current_events = 0 } );
- /* Critical section - locked by a spinlock because this stuff can
- * be changed from interrupt handlers. The stuff above is done
- * outside the lock because it allocates memory.
- */
-
- /* Actually, it only looks like it can be called from interrupt
- * context. The culprit is reactivate_fd, which calls
- * maybe_sigio_broken, which calls write_sigio_workaround,
- * which calls activate_fd. However, write_sigio_workaround should
- * only be called once, at boot time. That would make it clear that
- * this is called only from process context, and can be locked with
- * a semaphore.
- */
spin_lock_irqsave(&irq_lock, flags);
for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
@@ -165,7 +152,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
}
}
- /*-------------*/
if (type == IRQ_WRITE)
fd = -1;
@@ -198,7 +184,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
spin_lock_irqsave(&irq_lock, flags);
}
- /*-------------*/
*last_irq_ptr = new_fd;
last_irq_ptr = &new_fd->next;
@@ -210,14 +195,14 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
*/
maybe_sigio_broken(fd, (type == IRQ_READ));
- return(0);
+ return 0;
out_unlock:
spin_unlock_irqrestore(&irq_lock, flags);
out_kfree:
kfree(new_fd);
out:
- return(err);
+ return err;
}
static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
@@ -302,10 +287,7 @@ void reactivate_fd(int fd, int irqnum)
os_set_pollfd(i, irq->fd);
spin_unlock_irqrestore(&irq_lock, flags);
- /* This calls activate_fd, so it has to be outside the critical
- * section.
- */
- maybe_sigio_broken(fd, (irq->type == IRQ_READ));
+ add_sigio_fd(fd);
}
void deactivate_fd(int fd, int irqnum)
@@ -316,11 +298,15 @@ void deactivate_fd(int fd, int irqnum)
spin_lock_irqsave(&irq_lock, flags);
irq = find_irq_by_fd(fd, irqnum, &i);
- if (irq == NULL)
- goto out;
+ if(irq == NULL){
+ spin_unlock_irqrestore(&irq_lock, flags);
+ return;
+ }
+
os_set_pollfd(i, -1);
- out:
spin_unlock_irqrestore(&irq_lock, flags);
+
+ ignore_sigio_fd(fd);
}
int deactivate_all_fds(void)
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 61280167c560..93121c6d26e5 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -79,8 +79,10 @@ void mem_init(void)
/* this will put all low memory onto the freelists */
totalram_pages = free_all_bootmem();
+#ifdef CONFIG_HIGHMEM
totalhigh_pages = highmem >> PAGE_SHIFT;
totalram_pages += totalhigh_pages;
+#endif
num_physpages = totalram_pages;
max_pfn = totalram_pages;
printk(KERN_INFO "Memory: %luk available\n",
@@ -221,10 +223,13 @@ void paging_init(void)
empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
- for(i=0;i<sizeof(zones_size)/sizeof(zones_size[0]);i++)
+ for(i = 0; i < ARRAY_SIZE(zones_size); i++)
zones_size[i] = 0;
+
zones_size[ZONE_DMA] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT);
+#ifdef CONFIG_HIGHMEM
zones_size[ZONE_HIGHMEM] = highmem >> PAGE_SHIFT;
+#endif
free_area_init(zones_size);
/*
diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
index f6a5a502120b..537895d68ad1 100644
--- a/arch/um/kernel/process_kern.c
+++ b/arch/um/kernel/process_kern.c
@@ -23,6 +23,7 @@
#include "linux/proc_fs.h"
#include "linux/ptrace.h"
#include "linux/random.h"
+#include "linux/personality.h"
#include "asm/unistd.h"
#include "asm/mman.h"
#include "asm/segment.h"
@@ -476,7 +477,7 @@ int singlestepping(void * t)
#ifndef arch_align_stack
unsigned long arch_align_stack(unsigned long sp)
{
- if (randomize_va_space)
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 3ef73bf2e781..f602623644aa 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -22,7 +22,7 @@ static void kill_idlers(int me)
struct task_struct *p;
int i;
- for(i = 0; i < sizeof(idle_threads)/sizeof(idle_threads[0]); i++){
+ for(i = 0; i < ARRAY_SIZE(idle_threads); i++){
p = idle_threads[i];
if((p != NULL) && (p->thread.mode.tt.extern_pid != me))
os_kill_process(p->thread.mode.tt.extern_pid, 0);
@@ -62,14 +62,3 @@ void machine_halt(void)
{
machine_power_off();
}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 624ca238d1fd..79c22707a637 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -55,7 +55,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
* destroy_context_skas.
*/
- mm->context.skas.last_page_table = pmd_page_kernel(*pmd);
+ mm->context.skas.last_page_table = pmd_page_vaddr(*pmd);
#ifdef CONFIG_3_LEVEL_PGTABLES
mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud));
#endif
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 552ca1cb9847..2454bbd9555d 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -35,9 +35,6 @@ unsigned long long sched_clock(void)
return (unsigned long long)jiffies_64 * (1000000000 / HZ);
}
-/* Changed at early boot */
-int timer_irq_inited = 0;
-
static unsigned long long prev_nsecs;
#ifdef CONFIG_UML_REAL_TIME_CLOCK
static long long delta; /* Deviation per interval */
@@ -113,12 +110,13 @@ static void register_timer(void)
err = request_irq(TIMER_IRQ, um_timer, IRQF_DISABLED, "timer", NULL);
if(err != 0)
- printk(KERN_ERR "timer_init : request_irq failed - "
+ printk(KERN_ERR "register_timer : request_irq failed - "
"errno = %d\n", -err);
- timer_irq_inited = 1;
-
- user_time_init();
+ err = set_interval(1);
+ if(err != 0)
+ printk(KERN_ERR "register_timer : set_interval failed - "
+ "errno = %d\n", -err);
}
extern void (*late_time_init)(void);
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index f5b0636f9ad7..54a5ff25645a 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
* Licensed under the GPL
*/
@@ -16,12 +16,12 @@
#include "os.h"
static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
- int r, int w, int x, struct host_vm_op *ops, int *index,
+ int r, int w, int x, struct host_vm_op *ops, int *index,
int last_filled, union mm_context *mmu, void **flush,
int (*do_ops)(union mm_context *, struct host_vm_op *,
int, int, void **))
{
- __u64 offset;
+ __u64 offset;
struct host_vm_op *last;
int fd, ret = 0;
@@ -89,7 +89,7 @@ static int add_munmap(unsigned long addr, unsigned long len,
static int add_mprotect(unsigned long addr, unsigned long len, int r, int w,
int x, struct host_vm_op *ops, int *index,
int last_filled, union mm_context *mmu, void **flush,
- int (*do_ops)(union mm_context *, struct host_vm_op *,
+ int (*do_ops)(union mm_context *, struct host_vm_op *,
int, int, void **))
{
struct host_vm_op *last;
@@ -124,105 +124,105 @@ static int add_mprotect(unsigned long addr, unsigned long len, int r, int w,
#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
- unsigned long end_addr, int force,
+ unsigned long end_addr, int force,
int (*do_ops)(union mm_context *, struct host_vm_op *,
int, int, void **))
{
- pgd_t *npgd;
- pud_t *npud;
- pmd_t *npmd;
- pte_t *npte;
- union mm_context *mmu = &mm->context;
- unsigned long addr, end;
- int r, w, x;
- struct host_vm_op ops[1];
- void *flush = NULL;
- int op_index = -1, last_op = sizeof(ops) / sizeof(ops[0]) - 1;
- int ret = 0;
-
- if(mm == NULL) return;
-
- ops[0].type = NONE;
- for(addr = start_addr; addr < end_addr && !ret;){
- npgd = pgd_offset(mm, addr);
- if(!pgd_present(*npgd)){
- end = ADD_ROUND(addr, PGDIR_SIZE);
- if(end > end_addr)
- end = end_addr;
- if(force || pgd_newpage(*npgd)){
- ret = add_munmap(addr, end - addr, ops,
- &op_index, last_op, mmu,
- &flush, do_ops);
- pgd_mkuptodate(*npgd);
- }
- addr = end;
- continue;
- }
-
- npud = pud_offset(npgd, addr);
- if(!pud_present(*npud)){
- end = ADD_ROUND(addr, PUD_SIZE);
- if(end > end_addr)
- end = end_addr;
- if(force || pud_newpage(*npud)){
- ret = add_munmap(addr, end - addr, ops,
- &op_index, last_op, mmu,
- &flush, do_ops);
- pud_mkuptodate(*npud);
- }
- addr = end;
- continue;
- }
-
- npmd = pmd_offset(npud, addr);
- if(!pmd_present(*npmd)){
- end = ADD_ROUND(addr, PMD_SIZE);
- if(end > end_addr)
- end = end_addr;
- if(force || pmd_newpage(*npmd)){
- ret = add_munmap(addr, end - addr, ops,
- &op_index, last_op, mmu,
- &flush, do_ops);
- pmd_mkuptodate(*npmd);
- }
- addr = end;
- continue;
- }
-
- npte = pte_offset_kernel(npmd, addr);
- r = pte_read(*npte);
- w = pte_write(*npte);
- x = pte_exec(*npte);
+ pgd_t *npgd;
+ pud_t *npud;
+ pmd_t *npmd;
+ pte_t *npte;
+ union mm_context *mmu = &mm->context;
+ unsigned long addr, end;
+ int r, w, x;
+ struct host_vm_op ops[1];
+ void *flush = NULL;
+ int op_index = -1, last_op = ARRAY_SIZE(ops) - 1;
+ int ret = 0;
+
+ if(mm == NULL)
+ return;
+
+ ops[0].type = NONE;
+ for(addr = start_addr; addr < end_addr && !ret;){
+ npgd = pgd_offset(mm, addr);
+ if(!pgd_present(*npgd)){
+ end = ADD_ROUND(addr, PGDIR_SIZE);
+ if(end > end_addr)
+ end = end_addr;
+ if(force || pgd_newpage(*npgd)){
+ ret = add_munmap(addr, end - addr, ops,
+ &op_index, last_op, mmu,
+ &flush, do_ops);
+ pgd_mkuptodate(*npgd);
+ }
+ addr = end;
+ continue;
+ }
+
+ npud = pud_offset(npgd, addr);
+ if(!pud_present(*npud)){
+ end = ADD_ROUND(addr, PUD_SIZE);
+ if(end > end_addr)
+ end = end_addr;
+ if(force || pud_newpage(*npud)){
+ ret = add_munmap(addr, end - addr, ops,
+ &op_index, last_op, mmu,
+ &flush, do_ops);
+ pud_mkuptodate(*npud);
+ }
+ addr = end;
+ continue;
+ }
+
+ npmd = pmd_offset(npud, addr);
+ if(!pmd_present(*npmd)){
+ end = ADD_ROUND(addr, PMD_SIZE);
+ if(end > end_addr)
+ end = end_addr;
+ if(force || pmd_newpage(*npmd)){
+ ret = add_munmap(addr, end - addr, ops,
+ &op_index, last_op, mmu,
+ &flush, do_ops);
+ pmd_mkuptodate(*npmd);
+ }
+ addr = end;
+ continue;
+ }
+
+ npte = pte_offset_kernel(npmd, addr);
+ r = pte_read(*npte);
+ w = pte_write(*npte);
+ x = pte_exec(*npte);
if (!pte_young(*npte)) {
r = 0;
w = 0;
} else if (!pte_dirty(*npte)) {
w = 0;
}
- if(force || pte_newpage(*npte)){
- if(pte_present(*npte))
- ret = add_mmap(addr,
- pte_val(*npte) & PAGE_MASK,
- PAGE_SIZE, r, w, x, ops,
- &op_index, last_op, mmu,
- &flush, do_ops);
+ if(force || pte_newpage(*npte)){
+ if(pte_present(*npte))
+ ret = add_mmap(addr,
+ pte_val(*npte) & PAGE_MASK,
+ PAGE_SIZE, r, w, x, ops,
+ &op_index, last_op, mmu,
+ &flush, do_ops);
else ret = add_munmap(addr, PAGE_SIZE, ops,
&op_index, last_op, mmu,
&flush, do_ops);
- }
- else if(pte_newprot(*npte))
+ }
+ else if(pte_newprot(*npte))
ret = add_mprotect(addr, PAGE_SIZE, r, w, x, ops,
&op_index, last_op, mmu,
&flush, do_ops);
- *npte = pte_mkuptodate(*npte);
- addr += PAGE_SIZE;
- }
-
+ *npte = pte_mkuptodate(*npte);
+ addr += PAGE_SIZE;
+ }
if(!ret)
ret = (*do_ops)(mmu, ops, op_index, 1, &flush);
- /* This is not an else because ret is modified above */
+/* This is not an else because ret is modified above */
if(ret) {
printk("fix_range_common: failed, killing current process\n");
force_sig(SIGKILL, current);
@@ -231,160 +231,160 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
{
- struct mm_struct *mm;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- unsigned long addr, last;
- int updated = 0, err;
-
- mm = &init_mm;
- for(addr = start; addr < end;){
- pgd = pgd_offset(mm, addr);
- if(!pgd_present(*pgd)){
- last = ADD_ROUND(addr, PGDIR_SIZE);
- if(last > end)
- last = end;
- if(pgd_newpage(*pgd)){
- updated = 1;
- err = os_unmap_memory((void *) addr,
- last - addr);
- if(err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- }
- addr = last;
- continue;
- }
-
- pud = pud_offset(pgd, addr);
- if(!pud_present(*pud)){
- last = ADD_ROUND(addr, PUD_SIZE);
- if(last > end)
- last = end;
- if(pud_newpage(*pud)){
- updated = 1;
- err = os_unmap_memory((void *) addr,
- last - addr);
- if(err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- }
- addr = last;
- continue;
- }
-
- pmd = pmd_offset(pud, addr);
- if(!pmd_present(*pmd)){
- last = ADD_ROUND(addr, PMD_SIZE);
- if(last > end)
- last = end;
- if(pmd_newpage(*pmd)){
- updated = 1;
- err = os_unmap_memory((void *) addr,
- last - addr);
- if(err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- }
- addr = last;
- continue;
- }
-
- pte = pte_offset_kernel(pmd, addr);
- if(!pte_present(*pte) || pte_newpage(*pte)){
- updated = 1;
- err = os_unmap_memory((void *) addr,
- PAGE_SIZE);
- if(err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- if(pte_present(*pte))
- map_memory(addr,
- pte_val(*pte) & PAGE_MASK,
- PAGE_SIZE, 1, 1, 1);
- }
- else if(pte_newprot(*pte)){
- updated = 1;
- os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
- }
- addr += PAGE_SIZE;
- }
- return(updated);
+ struct mm_struct *mm;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long addr, last;
+ int updated = 0, err;
+
+ mm = &init_mm;
+ for(addr = start; addr < end;){
+ pgd = pgd_offset(mm, addr);
+ if(!pgd_present(*pgd)){
+ last = ADD_ROUND(addr, PGDIR_SIZE);
+ if(last > end)
+ last = end;
+ if(pgd_newpage(*pgd)){
+ updated = 1;
+ err = os_unmap_memory((void *) addr,
+ last - addr);
+ if(err < 0)
+ panic("munmap failed, errno = %d\n",
+ -err);
+ }
+ addr = last;
+ continue;
+ }
+
+ pud = pud_offset(pgd, addr);
+ if(!pud_present(*pud)){
+ last = ADD_ROUND(addr, PUD_SIZE);
+ if(last > end)
+ last = end;
+ if(pud_newpage(*pud)){
+ updated = 1;
+ err = os_unmap_memory((void *) addr,
+ last - addr);
+ if(err < 0)
+ panic("munmap failed, errno = %d\n",
+ -err);
+ }
+ addr = last;
+ continue;
+ }
+
+ pmd = pmd_offset(pud, addr);
+ if(!pmd_present(*pmd)){
+ last = ADD_ROUND(addr, PMD_SIZE);
+ if(last > end)
+ last = end;
+ if(pmd_newpage(*pmd)){
+ updated = 1;
+ err = os_unmap_memory((void *) addr,
+ last - addr);
+ if(err < 0)
+ panic("munmap failed, errno = %d\n",
+ -err);
+ }
+ addr = last;
+ continue;
+ }
+
+ pte = pte_offset_kernel(pmd, addr);
+ if(!pte_present(*pte) || pte_newpage(*pte)){
+ updated = 1;
+ err = os_unmap_memory((void *) addr,
+ PAGE_SIZE);
+ if(err < 0)
+ panic("munmap failed, errno = %d\n",
+ -err);
+ if(pte_present(*pte))
+ map_memory(addr,
+ pte_val(*pte) & PAGE_MASK,
+ PAGE_SIZE, 1, 1, 1);
+ }
+ else if(pte_newprot(*pte)){
+ updated = 1;
+ os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
+ }
+ addr += PAGE_SIZE;
+ }
+ return(updated);
}
pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
{
- return(pgd_offset(mm, address));
+ return(pgd_offset(mm, address));
}
pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
{
- return(pud_offset(pgd, address));
+ return(pud_offset(pgd, address));
}
pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
{
- return(pmd_offset(pud, address));
+ return(pmd_offset(pud, address));
}
pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
{
- return(pte_offset_kernel(pmd, address));
+ return(pte_offset_kernel(pmd, address));
}
pte_t *addr_pte(struct task_struct *task, unsigned long addr)
{
- pgd_t *pgd = pgd_offset(task->mm, addr);
- pud_t *pud = pud_offset(pgd, addr);
- pmd_t *pmd = pmd_offset(pud, addr);
+ pgd_t *pgd = pgd_offset(task->mm, addr);
+ pud_t *pud = pud_offset(pgd, addr);
+ pmd_t *pmd = pmd_offset(pud, addr);
- return(pte_offset_map(pmd, addr));
+ return(pte_offset_map(pmd, addr));
}
void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
{
- address &= PAGE_MASK;
- flush_tlb_range(vma, address, address + PAGE_SIZE);
+ address &= PAGE_MASK;
+ flush_tlb_range(vma, address, address + PAGE_SIZE);
}
void flush_tlb_all(void)
{
- flush_tlb_mm(current->mm);
+ flush_tlb_mm(current->mm);
}
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt,
- flush_tlb_kernel_range_common, start, end);
+ CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt,
+ flush_tlb_kernel_range_common, start, end);
}
void flush_tlb_kernel_vm(void)
{
- CHOOSE_MODE(flush_tlb_kernel_vm_tt(),
- flush_tlb_kernel_range_common(start_vm, end_vm));
+ CHOOSE_MODE(flush_tlb_kernel_vm_tt(),
+ flush_tlb_kernel_range_common(start_vm, end_vm));
}
void __flush_tlb_one(unsigned long addr)
{
- CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr);
+ CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr);
}
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
- CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start,
- end);
+ CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start,
+ end);
}
void flush_tlb_mm(struct mm_struct *mm)
{
- CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm);
+ CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm);
}
void force_flush_all(void)
{
- CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas());
+ CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas());
}
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index ac70fa5a2e2a..e5eeaf2b6af1 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -227,9 +227,16 @@ void bad_segv(struct faultinfo fi, unsigned long ip)
void relay_signal(int sig, union uml_pt_regs *regs)
{
- if(arch_handle_signal(sig, regs)) return;
- if(!UPT_IS_USER(regs))
+ if(arch_handle_signal(sig, regs))
+ return;
+
+ if(!UPT_IS_USER(regs)){
+ if(sig == SIGBUS)
+ printk("Bus error - the /dev/shm or /tmp mount likely "
+ "just ran out of space\n");
panic("Kernel mode signal %d", sig);
+ }
+
current->thread.arch.faultinfo = *UPT_FAULTINFO(regs);
force_sig(sig, current);
}
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index 6987d1d247a2..cd15b9df5b5c 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -42,7 +42,7 @@ static int helper_child(void *arg)
if(data->pre_exec != NULL)
(*data->pre_exec)(data->pre_data);
execvp(argv[0], argv);
- errval = errno;
+ errval = -errno;
printk("helper_child - execve of '%s' failed - errno = %d\n", argv[0], errno);
os_write_file(data->fd, &errval, sizeof(errval));
kill(os_getpid(), SIGKILL);
@@ -62,7 +62,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv,
stack = *stack_out;
else stack = alloc_stack(0, __cant_sleep());
if(stack == 0)
- return(-ENOMEM);
+ return -ENOMEM;
ret = os_pipe(fds, 1, 0);
if(ret < 0){
@@ -95,16 +95,16 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv,
/* Read the errno value from the child, if the exec failed, or get 0 if
* the exec succeeded because the pipe fd was set as close-on-exec. */
n = os_read_file(fds[0], &ret, sizeof(ret));
- if (n < 0) {
- printk("run_helper : read on pipe failed, ret = %d\n", -n);
- ret = n;
- kill(pid, SIGKILL);
- CATCH_EINTR(waitpid(pid, NULL, 0));
- } else if(n != 0){
- CATCH_EINTR(n = waitpid(pid, NULL, 0));
- ret = -errno;
- } else {
+ if(n == 0)
ret = pid;
+ else {
+ if(n < 0){
+ printk("run_helper : read on pipe failed, ret = %d\n",
+ -n);
+ ret = n;
+ kill(pid, SIGKILL);
+ }
+ CATCH_EINTR(waitpid(pid, NULL, 0));
}
out_close:
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index 7555bf9c33d9..a97206df5b52 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -132,7 +132,7 @@ void os_set_pollfd(int i, int fd)
void os_set_ioignore(void)
{
- set_handler(SIGIO, SIG_IGN, 0, -1);
+ signal(SIGIO, SIG_IGN);
}
void init_irq_signals(int on_sigstack)
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 90912aaca7aa..d1c5670787dc 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -67,13 +67,32 @@ static __init void do_uml_initcalls(void)
static void last_ditch_exit(int sig)
{
- signal(SIGINT, SIG_DFL);
- signal(SIGTERM, SIG_DFL);
- signal(SIGHUP, SIG_DFL);
uml_cleanup();
exit(1);
}
+static void install_fatal_handler(int sig)
+{
+ struct sigaction action;
+
+ /* All signals are enabled in this handler ... */
+ sigemptyset(&action.sa_mask);
+
+ /* ... including the signal being handled, plus we want the
+ * handler reset to the default behavior, so that if an exit
+ * handler is hanging for some reason, the UML will just die
+ * after this signal is sent a second time.
+ */
+ action.sa_flags = SA_RESETHAND | SA_NODEFER;
+ action.sa_restorer = NULL;
+ action.sa_handler = last_ditch_exit;
+ if(sigaction(sig, &action, NULL) < 0){
+ printf("failed to install handler for signal %d - errno = %d\n",
+ errno);
+ exit(1);
+ }
+}
+
#define UML_LIB_PATH ":/usr/lib/uml"
static void setup_env_path(void)
@@ -158,9 +177,12 @@ int main(int argc, char **argv, char **envp)
}
new_argv[argc] = NULL;
- set_handler(SIGINT, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1);
- set_handler(SIGTERM, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1);
- set_handler(SIGHUP, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1);
+ /* Allow these signals to bring down a UML if all other
+ * methods of control fail.
+ */
+ install_fatal_handler(SIGINT);
+ install_fatal_handler(SIGTERM);
+ install_fatal_handler(SIGHUP);
scan_elf_aux( envp);
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index 560c8063c77c..b170b4704dc4 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -114,14 +114,14 @@ static void which_tmpdir(void)
}
while(1){
- found = next(fd, buf, sizeof(buf) / sizeof(buf[0]), ' ');
+ found = next(fd, buf, ARRAY_SIZE(buf), ' ');
if(found != 1)
break;
if(!strncmp(buf, "/dev/shm", strlen("/dev/shm")))
goto found;
- found = next(fd, buf, sizeof(buf) / sizeof(buf[0]), '\n');
+ found = next(fd, buf, ARRAY_SIZE(buf), '\n');
if(found != 1)
break;
}
@@ -135,7 +135,7 @@ err:
return;
found:
- found = next(fd, buf, sizeof(buf) / sizeof(buf[0]), ' ');
+ found = next(fd, buf, ARRAY_SIZE(buf), ' ');
if(found != 1)
goto err;
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index b98d3ca2cd1b..ff203625a4bd 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -7,7 +7,6 @@
#include <stdio.h>
#include <errno.h>
#include <signal.h>
-#include <setjmp.h>
#include <linux/unistd.h>
#include <sys/mman.h>
#include <sys/wait.h>
@@ -247,7 +246,17 @@ void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int))
set_sigstack(sig_stack, pages * page_size());
flags = SA_ONSTACK;
}
- if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1);
+ if(usr1_handler){
+ struct sigaction sa;
+
+ sa.sa_handler = usr1_handler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = flags;
+ sa.sa_restorer = NULL;
+ if(sigaction(SIGUSR1, &sa, NULL) < 0)
+ panic("init_new_thread_stack - sigaction failed - "
+ "errno = %d\n", errno);
+ }
}
void init_new_thread_signals(void)
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 0ecac563c7b3..f6457765b17d 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -43,17 +43,9 @@ struct pollfds {
/* Protected by sigio_lock(). Used by the sigio thread, but the UML thread
* synchronizes with it.
*/
-static struct pollfds current_poll = {
- .poll = NULL,
- .size = 0,
- .used = 0
-};
-
-static struct pollfds next_poll = {
- .poll = NULL,
- .size = 0,
- .used = 0
-};
+static struct pollfds current_poll;
+static struct pollfds next_poll;
+static struct pollfds all_sigio_fds;
static int write_sigio_thread(void *unused)
{
@@ -78,7 +70,8 @@ static int write_sigio_thread(void *unused)
n = os_read_file(sigio_private[1], &c, sizeof(c));
if(n != sizeof(c))
printk("write_sigio_thread : "
- "read failed, err = %d\n", -n);
+ "read on socket failed, "
+ "err = %d\n", -n);
tmp = current_poll;
current_poll = next_poll;
next_poll = tmp;
@@ -93,35 +86,36 @@ static int write_sigio_thread(void *unused)
n = os_write_file(respond_fd, &c, sizeof(c));
if(n != sizeof(c))
- printk("write_sigio_thread : write failed, "
- "err = %d\n", -n);
+ printk("write_sigio_thread : write on socket "
+ "failed, err = %d\n", -n);
}
}
return 0;
}
-static int need_poll(int n)
+static int need_poll(struct pollfds *polls, int n)
{
- if(n <= next_poll.size){
- next_poll.used = n;
- return(0);
+ if(n <= polls->size){
+ polls->used = n;
+ return 0;
}
- kfree(next_poll.poll);
- next_poll.poll = um_kmalloc_atomic(n * sizeof(struct pollfd));
- if(next_poll.poll == NULL){
+ kfree(polls->poll);
+ polls->poll = um_kmalloc_atomic(n * sizeof(struct pollfd));
+ if(polls->poll == NULL){
printk("need_poll : failed to allocate new pollfds\n");
- next_poll.size = 0;
- next_poll.used = 0;
- return(-1);
+ polls->size = 0;
+ polls->used = 0;
+ return -ENOMEM;
}
- next_poll.size = n;
- next_poll.used = n;
- return(0);
+ polls->size = n;
+ polls->used = n;
+ return 0;
}
/* Must be called with sigio_lock held, because it's needed by the marked
- * critical section. */
+ * critical section.
+ */
static void update_thread(void)
{
unsigned long flags;
@@ -156,34 +150,39 @@ static void update_thread(void)
set_signals(flags);
}
-static int add_sigio_fd(int fd, int read)
+int add_sigio_fd(int fd)
{
- int err = 0, i, n, events;
+ struct pollfd *p;
+ int err = 0, i, n;
sigio_lock();
+ for(i = 0; i < all_sigio_fds.used; i++){
+ if(all_sigio_fds.poll[i].fd == fd)
+ break;
+ }
+ if(i == all_sigio_fds.used)
+ goto out;
+
+ p = &all_sigio_fds.poll[i];
+
for(i = 0; i < current_poll.used; i++){
if(current_poll.poll[i].fd == fd)
goto out;
}
n = current_poll.used + 1;
- err = need_poll(n);
+ err = need_poll(&next_poll, n);
if(err)
goto out;
for(i = 0; i < current_poll.used; i++)
next_poll.poll[i] = current_poll.poll[i];
- if(read) events = POLLIN;
- else events = POLLOUT;
-
- next_poll.poll[n - 1] = ((struct pollfd) { .fd = fd,
- .events = events,
- .revents = 0 });
+ next_poll.poll[n - 1] = *p;
update_thread();
out:
sigio_unlock();
- return(err);
+ return err;
}
int ignore_sigio_fd(int fd)
@@ -205,18 +204,14 @@ int ignore_sigio_fd(int fd)
if(i == current_poll.used)
goto out;
- err = need_poll(current_poll.used - 1);
+ err = need_poll(&next_poll, current_poll.used - 1);
if(err)
goto out;
for(i = 0; i < current_poll.used; i++){
p = &current_poll.poll[i];
- if(p->fd != fd) next_poll.poll[n++] = current_poll.poll[i];
- }
- if(n == i){
- printk("ignore_sigio_fd : fd %d not found\n", fd);
- err = -1;
- goto out;
+ if(p->fd != fd)
+ next_poll.poll[n++] = *p;
}
update_thread();
@@ -234,7 +229,7 @@ static struct pollfd *setup_initial_poll(int fd)
printk("setup_initial_poll : failed to allocate poll\n");
return NULL;
}
- *p = ((struct pollfd) { .fd = fd,
+ *p = ((struct pollfd) { .fd = fd,
.events = POLLIN,
.revents = 0 });
return p;
@@ -323,6 +318,8 @@ out_close1:
void maybe_sigio_broken(int fd, int read)
{
+ int err;
+
if(!isatty(fd))
return;
@@ -330,7 +327,19 @@ void maybe_sigio_broken(int fd, int read)
return;
write_sigio_workaround();
- add_sigio_fd(fd, read);
+
+ sigio_lock();
+ err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1);
+ if(err){
+ printk("maybe_sigio_broken - failed to add pollfd\n");
+ goto out;
+ }
+ all_sigio_fds.poll[all_sigio_fds.used++] =
+ ((struct pollfd) { .fd = fd,
+ .events = read ? POLLIN : POLLOUT,
+ .revents = 0 });
+out:
+ sigio_unlock();
}
static void sigio_cleanup(void)
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 60e4faedf254..6b81739279d1 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -15,7 +15,6 @@
#include "user.h"
#include "signal_kern.h"
#include "sysdep/sigcontext.h"
-#include "sysdep/signal.h"
#include "sigcontext.h"
#include "mode.h"
#include "os.h"
@@ -38,18 +37,10 @@
static int signals_enabled = 1;
static int pending = 0;
-void sig_handler(ARCH_SIGHDLR_PARAM)
+void sig_handler(int sig, struct sigcontext *sc)
{
- struct sigcontext *sc;
int enabled;
- /* Must be the first thing that this handler does - x86_64 stores
- * the sigcontext in %rdx, and we need to save it before it has a
- * chance to get trashed.
- */
-
- ARCH_GET_SIGCONTEXT(sc, sig);
-
enabled = signals_enabled;
if(!enabled && (sig == SIGIO)){
pending |= SIGIO_MASK;
@@ -64,15 +55,8 @@ void sig_handler(ARCH_SIGHDLR_PARAM)
set_signals(enabled);
}
-extern int timer_irq_inited;
-
static void real_alarm_handler(int sig, struct sigcontext *sc)
{
- if(!timer_irq_inited){
- signals_enabled = 1;
- return;
- }
-
if(sig == SIGALRM)
switch_timers(0);
@@ -84,13 +68,10 @@ static void real_alarm_handler(int sig, struct sigcontext *sc)
}
-void alarm_handler(ARCH_SIGHDLR_PARAM)
+void alarm_handler(int sig, struct sigcontext *sc)
{
- struct sigcontext *sc;
int enabled;
- ARCH_GET_SIGCONTEXT(sc, sig);
-
enabled = signals_enabled;
if(!signals_enabled){
if(sig == SIGVTALRM)
@@ -126,6 +107,10 @@ void remove_sigstack(void)
panic("disabling signal stack failed, errno = %d\n", errno);
}
+void (*handlers[_NSIG])(int sig, struct sigcontext *sc);
+
+extern void hard_handler(int sig);
+
void set_handler(int sig, void (*handler)(int), int flags, ...)
{
struct sigaction action;
@@ -133,13 +118,16 @@ void set_handler(int sig, void (*handler)(int), int flags, ...)
sigset_t sig_mask;
int mask;
- va_start(ap, flags);
- action.sa_handler = handler;
+ handlers[sig] = (void (*)(int, struct sigcontext *)) handler;
+ action.sa_handler = hard_handler;
+
sigemptyset(&action.sa_mask);
- while((mask = va_arg(ap, int)) != -1){
+
+ va_start(ap, flags);
+ while((mask = va_arg(ap, int)) != -1)
sigaddset(&action.sa_mask, mask);
- }
va_end(ap);
+
action.sa_flags = flags;
action.sa_restorer = NULL;
if(sigaction(sig, &action, NULL) < 0)
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 7baf90fda58b..42e3d1ed802c 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -8,7 +8,6 @@
#include <unistd.h>
#include <errno.h>
#include <signal.h>
-#include <setjmp.h>
#include <sched.h>
#include "ptrace_user.h"
#include <sys/wait.h>
@@ -156,11 +155,15 @@ extern int __syscall_stub_start;
static int userspace_tramp(void *stack)
{
void *addr;
+ int err;
ptrace(PTRACE_TRACEME, 0, 0, 0);
init_new_thread_signals();
- enable_timer();
+ err = set_interval(1);
+ if(err)
+ panic("userspace_tramp - setting timer failed, errno = %d\n",
+ err);
if(!proc_mm){
/* This has a pte, but it can't be mapped in with the usual
@@ -190,14 +193,25 @@ static int userspace_tramp(void *stack)
}
}
if(!ptrace_faultinfo && (stack != NULL)){
+ struct sigaction sa;
+
unsigned long v = UML_CONFIG_STUB_CODE +
(unsigned long) stub_segv_handler -
(unsigned long) &__syscall_stub_start;
set_sigstack((void *) UML_CONFIG_STUB_DATA, page_size());
- set_handler(SIGSEGV, (void *) v, SA_ONSTACK,
- SIGIO, SIGWINCH, SIGALRM, SIGVTALRM,
- SIGUSR1, -1);
+ sigemptyset(&sa.sa_mask);
+ sigaddset(&sa.sa_mask, SIGIO);
+ sigaddset(&sa.sa_mask, SIGWINCH);
+ sigaddset(&sa.sa_mask, SIGALRM);
+ sigaddset(&sa.sa_mask, SIGVTALRM);
+ sigaddset(&sa.sa_mask, SIGUSR1);
+ sa.sa_flags = SA_ONSTACK;
+ sa.sa_handler = (void *) v;
+ sa.sa_restorer = NULL;
+ if(sigaction(SIGSEGV, &sa, NULL) < 0)
+ panic("userspace_tramp - setting SIGSEGV handler "
+ "failed - errno = %d\n", errno);
}
os_stop_process(os_getpid());
@@ -470,7 +484,7 @@ void thread_wait(void *sw, void *fb)
*switch_buf = &buf;
fork_buf = fb;
if(UML_SETJMP(&buf) == 0)
- siglongjmp(*fork_buf, INIT_JMP_REMOVE_SIGSTACK);
+ UML_LONGJMP(fork_buf, INIT_JMP_REMOVE_SIGSTACK);
}
void switch_threads(void *me, void *next)
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 503148504009..7fe92680c7dd 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -14,7 +14,6 @@
#include <sched.h>
#include <fcntl.h>
#include <errno.h>
-#include <setjmp.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <sys/mman.h>
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile
index b3213613c41c..37806621b25d 100644
--- a/arch/um/os-Linux/sys-i386/Makefile
+++ b/arch/um/os-Linux/sys-i386/Makefile
@@ -3,7 +3,7 @@
# Licensed under the GPL
#
-obj-$(CONFIG_MODE_SKAS) = registers.o tls.o
+obj-$(CONFIG_MODE_SKAS) = registers.o signal.o tls.o
USER_OBJS := $(obj-y)
diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c
index 516f66dd87e3..7cd0369e02b3 100644
--- a/arch/um/os-Linux/sys-i386/registers.c
+++ b/arch/um/os-Linux/sys-i386/registers.c
@@ -5,12 +5,12 @@
#include <errno.h>
#include <string.h>
-#include <setjmp.h>
#include "sysdep/ptrace_user.h"
#include "sysdep/ptrace.h"
#include "uml-config.h"
#include "skas_ptregs.h"
#include "registers.h"
+#include "longjmp.h"
#include "user.h"
/* These are set once at boot time and not changed thereafter */
@@ -130,11 +130,14 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
HOST_FP_SIZE * sizeof(unsigned long));
}
-void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer)
+unsigned long get_thread_reg(int reg, jmp_buf *buf)
{
- struct __jmp_buf_tag *jmpbuf = buffer;
-
- UPT_SET(uml_regs, EIP, jmpbuf->__jmpbuf[JB_PC]);
- UPT_SET(uml_regs, UESP, jmpbuf->__jmpbuf[JB_SP]);
- UPT_SET(uml_regs, EBP, jmpbuf->__jmpbuf[JB_BP]);
+ switch(reg){
+ case EIP: return buf[0]->__eip;
+ case UESP: return buf[0]->__esp;
+ case EBP: return buf[0]->__ebp;
+ default:
+ printk("get_thread_regs - unknown register %d\n", reg);
+ return 0;
+ }
}
diff --git a/arch/um/os-Linux/sys-i386/signal.c b/arch/um/os-Linux/sys-i386/signal.c
new file mode 100644
index 000000000000..0d3eae518352
--- /dev/null
+++ b/arch/um/os-Linux/sys-i386/signal.c
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include <signal.h>
+
+extern void (*handlers[])(int sig, struct sigcontext *sc);
+
+void hard_handler(int sig)
+{
+ struct sigcontext *sc = (struct sigcontext *) (&sig + 1);
+
+ (*handlers[sig])(sig, sc);
+}
diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile
index 340ef26f5944..f67842a7735b 100644
--- a/arch/um/os-Linux/sys-x86_64/Makefile
+++ b/arch/um/os-Linux/sys-x86_64/Makefile
@@ -3,7 +3,7 @@
# Licensed under the GPL
#
-obj-$(CONFIG_MODE_SKAS) = registers.o
+obj-$(CONFIG_MODE_SKAS) = registers.o signal.o
USER_OBJS := $(obj-y)
diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c
index becd898d9398..cb8e8a263280 100644
--- a/arch/um/os-Linux/sys-x86_64/registers.c
+++ b/arch/um/os-Linux/sys-x86_64/registers.c
@@ -5,11 +5,11 @@
#include <errno.h>
#include <string.h>
-#include <setjmp.h>
#include "ptrace_user.h"
#include "uml-config.h"
#include "skas_ptregs.h"
#include "registers.h"
+#include "longjmp.h"
#include "user.h"
/* These are set once at boot time and not changed thereafter */
@@ -78,11 +78,14 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
HOST_FP_SIZE * sizeof(unsigned long));
}
-void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer)
+unsigned long get_thread_reg(int reg, jmp_buf *buf)
{
- struct __jmp_buf_tag *jmpbuf = buffer;
-
- UPT_SET(uml_regs, RIP, jmpbuf->__jmpbuf[JB_PC]);
- UPT_SET(uml_regs, RSP, jmpbuf->__jmpbuf[JB_RSP]);
- UPT_SET(uml_regs, RBP, jmpbuf->__jmpbuf[JB_RBP]);
+ switch(reg){
+ case RIP: return buf[0]->__rip;
+ case RSP: return buf[0]->__rsp;
+ case RBP: return buf[0]->__rbp;
+ default:
+ printk("get_thread_regs - unknown register %d\n", reg);
+ return 0;
+ }
}
diff --git a/arch/um/os-Linux/sys-x86_64/signal.c b/arch/um/os-Linux/sys-x86_64/signal.c
new file mode 100644
index 000000000000..3f369e5f976b
--- /dev/null
+++ b/arch/um/os-Linux/sys-x86_64/signal.c
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include <signal.h>
+
+extern void (*handlers[])(int sig, struct sigcontext *sc);
+
+void hard_handler(int sig)
+{
+ struct ucontext *uc;
+ asm("movq %%rdx, %0" : "=r" (uc));
+
+ (*handlers[sig])(sig, (struct sigcontext *) &uc->uc_mcontext);
+}
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index 4ae73c0e5485..38be096e750f 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -17,20 +17,25 @@
#include "kern_constants.h"
#include "os.h"
-static void set_interval(int timer_type)
+int set_interval(int is_virtual)
{
int usec = 1000000/hz();
+ int timer_type = is_virtual ? ITIMER_VIRTUAL : ITIMER_REAL;
struct itimerval interval = ((struct itimerval) { { 0, usec },
{ 0, usec } });
if(setitimer(timer_type, &interval, NULL) == -1)
- panic("setitimer failed - errno = %d\n", errno);
+ return -errno;
+
+ return 0;
}
+#ifdef CONFIG_MODE_TT
void enable_timer(void)
{
- set_interval(ITIMER_VIRTUAL);
+ set_interval(1);
}
+#endif
void disable_timer(void)
{
@@ -40,8 +45,8 @@ void disable_timer(void)
printk("disnable_timer - setitimer failed, errno = %d\n",
errno);
/* If there are signals already queued, after unblocking ignore them */
- set_handler(SIGALRM, SIG_IGN, 0, -1);
- set_handler(SIGVTALRM, SIG_IGN, 0, -1);
+ signal(SIGALRM, SIG_IGN);
+ signal(SIGVTALRM, SIG_IGN);
}
void switch_timers(int to_real)
@@ -74,7 +79,7 @@ void uml_idle_timer(void)
set_handler(SIGALRM, (__sighandler_t) alarm_handler,
SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
- set_interval(ITIMER_REAL);
+ set_interval(0);
}
#endif
@@ -94,8 +99,3 @@ void idle_sleep(int secs)
ts.tv_nsec = 0;
nanosleep(&ts, NULL);
}
-
-void user_time_init(void)
-{
- set_interval(ITIMER_VIRTUAL);
-}
diff --git a/arch/um/os-Linux/trap.c b/arch/um/os-Linux/trap.c
index 90b29ae9af46..1df231a26244 100644
--- a/arch/um/os-Linux/trap.c
+++ b/arch/um/os-Linux/trap.c
@@ -5,7 +5,6 @@
#include <stdlib.h>
#include <signal.h>
-#include <setjmp.h>
#include "kern_util.h"
#include "user_util.h"
#include "os.h"
diff --git a/arch/um/os-Linux/uaccess.c b/arch/um/os-Linux/uaccess.c
index 865f6a6a2590..bbb73a650370 100644
--- a/arch/um/os-Linux/uaccess.c
+++ b/arch/um/os-Linux/uaccess.c
@@ -4,8 +4,7 @@
* Licensed under the GPL
*/
-#include <setjmp.h>
-#include <string.h>
+#include <stddef.h>
#include "longjmp.h"
unsigned long __do_user_copy(void *to, const void *from, int n,
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index c47a2a7ce70e..3f5b1514e8a7 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -7,7 +7,6 @@
#include <stdlib.h>
#include <unistd.h>
#include <limits.h>
-#include <setjmp.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/utsname.h>
@@ -107,11 +106,11 @@ int setjmp_wrapper(void (*proc)(void *, void *), ...)
jmp_buf buf;
int n;
- n = sigsetjmp(buf, 1);
+ n = UML_SETJMP(&buf);
if(n == 0){
va_start(args, proc);
(*proc)(&buf, &args);
}
va_end(args);
- return(n);
+ return n;
}
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 374d61a19439..59cc70275754 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -1,5 +1,5 @@
obj-y = bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
- ptrace_user.o signal.o sigcontext.o syscalls.o sysrq.o \
+ ptrace_user.o setjmp.o signal.o sigcontext.o syscalls.o sysrq.o \
sys_call_table.o tls.o
obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o
diff --git a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c
index 41b0ab2fe830..f1bcd399ac90 100644
--- a/arch/um/sys-i386/bugs.c
+++ b/arch/um/sys-i386/bugs.c
@@ -13,6 +13,7 @@
#include "sysdep/ptrace.h"
#include "task.h"
#include "os.h"
+#include "user_util.h"
#define MAXTOKEN 64
@@ -104,17 +105,17 @@ int cpu_feature(char *what, char *buf, int len)
static int check_cpu_flag(char *feature, int *have_it)
{
char buf[MAXTOKEN], c;
- int fd, len = sizeof(buf)/sizeof(buf[0]);
+ int fd, len = ARRAY_SIZE(buf);
printk("Checking for host processor %s support...", feature);
fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0);
if(fd < 0){
printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd);
- return(0);
+ return 0;
}
*have_it = 0;
- if(!find_cpuinfo_line(fd, "flags", buf, sizeof(buf) / sizeof(buf[0])))
+ if(!find_cpuinfo_line(fd, "flags", buf, ARRAY_SIZE(buf)))
goto out;
c = token(fd, buf, len - 1, ' ');
@@ -138,7 +139,7 @@ static int check_cpu_flag(char *feature, int *have_it)
if(*have_it == 0) printk("No\n");
else if(*have_it == 1) printk("Yes\n");
os_close_file(fd);
- return(1);
+ return 1;
}
#if 0 /* This doesn't work in tt mode, plus it's causing compilation problems
diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c
index fe0877b3509c..69971b78beaf 100644
--- a/arch/um/sys-i386/ldt.c
+++ b/arch/um/sys-i386/ldt.c
@@ -424,9 +424,8 @@ void ldt_get_host_info(void)
size++;
}
- if(size < sizeof(dummy_list)/sizeof(dummy_list[0])) {
+ if(size < ARRAY_SIZE(dummy_list))
host_ldt_entries = dummy_list;
- }
else {
size = (size + 1) * sizeof(dummy_list[0]);
host_ldt_entries = (short *)kmalloc(size, GFP_KERNEL);
diff --git a/arch/um/sys-i386/ptrace_user.c b/arch/um/sys-i386/ptrace_user.c
index 40aa88531446..5f3cc6685820 100644
--- a/arch/um/sys-i386/ptrace_user.c
+++ b/arch/um/sys-i386/ptrace_user.c
@@ -15,6 +15,7 @@
#include "user.h"
#include "os.h"
#include "uml-config.h"
+#include "user_util.h"
int ptrace_getregs(long pid, unsigned long *regs_out)
{
@@ -51,7 +52,7 @@ static void write_debugregs(int pid, unsigned long *regs)
int nregs, i;
dummy = NULL;
- nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]);
+ nregs = ARRAY_SIZE(dummy->u_debugreg);
for(i = 0; i < nregs; i++){
if((i == 4) || (i == 5)) continue;
if(ptrace(PTRACE_POKEUSR, pid, &dummy->u_debugreg[i],
@@ -68,7 +69,7 @@ static void read_debugregs(int pid, unsigned long *regs)
int nregs, i;
dummy = NULL;
- nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]);
+ nregs = ARRAY_SIZE(dummy->u_debugreg);
for(i = 0; i < nregs; i++){
regs[i] = ptrace(PTRACE_PEEKUSR, pid,
&dummy->u_debugreg[i], 0);
diff --git a/arch/um/sys-i386/setjmp.S b/arch/um/sys-i386/setjmp.S
new file mode 100644
index 000000000000..b766792c9933
--- /dev/null
+++ b/arch/um/sys-i386/setjmp.S
@@ -0,0 +1,58 @@
+#
+# arch/i386/setjmp.S
+#
+# setjmp/longjmp for the i386 architecture
+#
+
+#
+# The jmp_buf is assumed to contain the following, in order:
+# %ebx
+# %esp
+# %ebp
+# %esi
+# %edi
+# <return address>
+#
+
+ .text
+ .align 4
+ .globl setjmp
+ .type setjmp, @function
+setjmp:
+#ifdef _REGPARM
+ movl %eax,%edx
+#else
+ movl 4(%esp),%edx
+#endif
+ popl %ecx # Return address, and adjust the stack
+ xorl %eax,%eax # Return value
+ movl %ebx,(%edx)
+ movl %esp,4(%edx) # Post-return %esp!
+ pushl %ecx # Make the call/return stack happy
+ movl %ebp,8(%edx)
+ movl %esi,12(%edx)
+ movl %edi,16(%edx)
+ movl %ecx,20(%edx) # Return address
+ ret
+
+ .size setjmp,.-setjmp
+
+ .text
+ .align 4
+ .globl longjmp
+ .type longjmp, @function
+longjmp:
+#ifdef _REGPARM
+ xchgl %eax,%edx
+#else
+ movl 4(%esp),%edx # jmp_ptr address
+ movl 8(%esp),%eax # Return value
+#endif
+ movl (%edx),%ebx
+ movl 4(%edx),%esp
+ movl 8(%edx),%ebp
+ movl 12(%edx),%esi
+ movl 16(%edx),%edi
+ jmp *20(%edx)
+
+ .size longjmp,.-longjmp
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index c19794d435d6..f41768b8e25e 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -5,8 +5,8 @@
#
obj-y = bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \
- sigcontext.o signal.o syscalls.o syscall_table.o sysrq.o ksyms.o \
- tls.o
+ setjmp.o sigcontext.o signal.o syscalls.o syscall_table.o sysrq.o \
+ ksyms.o tls.o
obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o
obj-$(CONFIG_MODULES) += um_module.o
diff --git a/arch/um/sys-x86_64/setjmp.S b/arch/um/sys-x86_64/setjmp.S
new file mode 100644
index 000000000000..45f547b4043e
--- /dev/null
+++ b/arch/um/sys-x86_64/setjmp.S
@@ -0,0 +1,54 @@
+#
+# arch/x86_64/setjmp.S
+#
+# setjmp/longjmp for the x86-64 architecture
+#
+
+#
+# The jmp_buf is assumed to contain the following, in order:
+# %rbx
+# %rsp (post-return)
+# %rbp
+# %r12
+# %r13
+# %r14
+# %r15
+# <return address>
+#
+
+ .text
+ .align 4
+ .globl setjmp
+ .type setjmp, @function
+setjmp:
+ pop %rsi # Return address, and adjust the stack
+ xorl %eax,%eax # Return value
+ movq %rbx,(%rdi)
+ movq %rsp,8(%rdi) # Post-return %rsp!
+ push %rsi # Make the call/return stack happy
+ movq %rbp,16(%rdi)
+ movq %r12,24(%rdi)
+ movq %r13,32(%rdi)
+ movq %r14,40(%rdi)
+ movq %r15,48(%rdi)
+ movq %rsi,56(%rdi) # Return address
+ ret
+
+ .size setjmp,.-setjmp
+
+ .text
+ .align 4
+ .globl longjmp
+ .type longjmp, @function
+longjmp:
+ movl %esi,%eax # Return value (int)
+ movq (%rdi),%rbx
+ movq 8(%rdi),%rsp
+ movq 16(%rdi),%rbp
+ movq 24(%rdi),%r12
+ movq 32(%rdi),%r13
+ movq 40(%rdi),%r14
+ movq 48(%rdi),%r15
+ jmp *56(%rdi)
+
+ .size longjmp,.-longjmp
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 6cd4878625f1..581ce9af0ec8 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -24,6 +24,10 @@ config X86
bool
default y
+config ZONE_DMA32
+ bool
+ default y
+
config LOCKDEP_SUPPORT
bool
default y
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index d6d7f731f6f0..708a3cd9a27e 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -16,6 +16,7 @@
#include <linux/string.h>
#include <linux/kexec.h>
#include <linux/module.h>
+#include <linux/mm.h>
#include <asm/pgtable.h>
#include <asm/page.h>
@@ -297,6 +298,53 @@ void __init e820_reserve_resources(void)
}
}
+/* Mark pages corresponding to given address range as nosave */
+static void __init
+e820_mark_nosave_range(unsigned long start, unsigned long end)
+{
+ unsigned long pfn, max_pfn;
+
+ if (start >= end)
+ return;
+
+ printk("Nosave address range: %016lx - %016lx\n", start, end);
+ max_pfn = end >> PAGE_SHIFT;
+ for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
+ if (pfn_valid(pfn))
+ SetPageNosave(pfn_to_page(pfn));
+}
+
+/*
+ * Find the ranges of physical addresses that do not correspond to
+ * e820 RAM areas and mark the corresponding pages as nosave for software
+ * suspend and suspend to RAM.
+ *
+ * This function requires the e820 map to be sorted and without any
+ * overlapping entries and assumes the first e820 area to be RAM.
+ */
+void __init e820_mark_nosave_regions(void)
+{
+ int i;
+ unsigned long paddr;
+
+ paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
+ for (i = 1; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ if (paddr < ei->addr)
+ e820_mark_nosave_range(paddr,
+ round_up(ei->addr, PAGE_SIZE));
+
+ paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
+ if (ei->type != E820_RAM)
+ e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
+ paddr);
+
+ if (paddr >= (end_pfn << PAGE_SHIFT))
+ break;
+ }
+}
+
/*
* Add a memory region to the kernel e820 map.
*/
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 34afad704824..4b39f0da17f3 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -689,6 +689,7 @@ void __init setup_arch(char **cmdline_p)
*/
probe_roms();
e820_reserve_resources();
+ e820_mark_nosave_regions();
request_resource(&iomem_resource, &video_ram_resource);
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 975380207b46..3ae9ffddddc0 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -46,9 +46,10 @@
#include <linux/bootmem.h>
#include <linux/thread_info.h>
#include <linux/module.h>
-
#include <linux/delay.h>
#include <linux/mc146818rtc.h>
+#include <linux/smp.h>
+
#include <asm/mtrr.h>
#include <asm/pgalloc.h>
#include <asm/desc.h>
diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S
index 320b6fb00cca..bfbe00763c68 100644
--- a/arch/x86_64/kernel/suspend_asm.S
+++ b/arch/x86_64/kernel/suspend_asm.S
@@ -54,7 +54,7 @@ ENTRY(restore_image)
movq %rcx, %cr3;
movq %rax, %cr4; # turn PGE back on
- movq pagedir_nosave(%rip), %rdx
+ movq restore_pblist(%rip), %rdx
loop:
testq %rdx, %rdx
jz done
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 7a9b18224182..7700e6cd2bd9 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -1148,23 +1148,25 @@ int hpet_rtc_timer_init(void)
hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
local_irq_save(flags);
+
cnt = hpet_readl(HPET_COUNTER);
cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
hpet_writel(cnt, HPET_T1_CMP);
hpet_t1_cmp = cnt;
- local_irq_restore(flags);
cfg = hpet_readl(HPET_T1_CFG);
cfg &= ~HPET_TN_PERIODIC;
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
hpet_writel(cfg, HPET_T1_CFG);
+ local_irq_restore(flags);
+
return 1;
}
static void hpet_rtc_timer_reinit(void)
{
- unsigned int cfg, cnt;
+ unsigned int cfg, cnt, ticks_per_int, lost_ints;
if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
cfg = hpet_readl(HPET_T1_CFG);
@@ -1179,10 +1181,33 @@ static void hpet_rtc_timer_reinit(void)
hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
/* It is more accurate to use the comparator value than current count.*/
- cnt = hpet_t1_cmp;
- cnt += hpet_tick*HZ/hpet_rtc_int_freq;
- hpet_writel(cnt, HPET_T1_CMP);
- hpet_t1_cmp = cnt;
+ ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
+ hpet_t1_cmp += ticks_per_int;
+ hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
+
+ /*
+ * If the interrupt handler was delayed too long, the write above tries
+ * to schedule the next interrupt in the past and the hardware would
+ * not interrupt until the counter had wrapped around.
+ * So we have to check that the comparator wasn't set to a past time.
+ */
+ cnt = hpet_readl(HPET_COUNTER);
+ if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
+ lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
+ /* Make sure that, even with the time needed to execute
+ * this code, the next scheduled interrupt has been moved
+ * back to the future: */
+ lost_ints++;
+
+ hpet_t1_cmp += lost_ints * ticks_per_int;
+ hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
+
+ if (PIE_on)
+ PIE_count += lost_ints;
+
+ printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
+ hpet_rtc_int_freq);
+ }
}
/*
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index ac8ea66ccb94..4198798e1469 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -299,7 +299,7 @@ static int vmalloc_fault(unsigned long address)
if (pgd_none(*pgd))
set_pgd(pgd, *pgd_ref);
else
- BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
/* Below here mismatches are bugs because these lower tables
are shared */
@@ -308,7 +308,7 @@ static int vmalloc_fault(unsigned long address)
pud_ref = pud_offset(pgd_ref, address);
if (pud_none(*pud_ref))
return -1;
- if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
+ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
BUG();
pmd = pmd_offset(pud, address);
pmd_ref = pmd_offset(pud_ref, address);
@@ -641,7 +641,7 @@ void vmalloc_sync_all(void)
if (pgd_none(*pgd))
set_pgd(pgd, *pgd_ref);
else
- BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
}
spin_unlock(&pgd_lock);
set_bit(pgd_index(address), insync);
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index d14fb2dfbfc4..52fd42c40c86 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -536,7 +536,7 @@ int memory_add_physaddr_to_nid(u64 start)
int arch_add_memory(int nid, u64 start, u64 size)
{
struct pglist_data *pgdat = NODE_DATA(nid);
- struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
+ struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index ab2ecccf7798..ffa111eea9da 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -851,7 +851,7 @@ static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev)
* @ap: Port whose timings we are configuring
* @adev: Drive in question
* @udma: udma mode, 0 - 6
- * @is_ich: set if the chip is an ICH device
+ * @isich: set if the chip is an ICH device
*
* Set UDMA mode for device, in host controller PCI config space.
*
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 27c22feebf30..8cd730fe5dd3 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -484,7 +484,7 @@ static void nv_error_handler(struct ata_port *ap)
static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
{
static int printed_version = 0;
- struct ata_port_info *ppi;
+ struct ata_port_info *ppi[2];
struct ata_probe_ent *probe_ent;
int pci_dev_busy = 0;
int rc;
@@ -520,8 +520,8 @@ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
rc = -ENOMEM;
- ppi = &nv_port_info[ent->driver_data];
- probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
+ ppi[0] = ppi[1] = &nv_port_info[ent->driver_data];
+ probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
if (!probe_ent)
goto err_out_regions;
diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c
index 9b17375d8056..18d49fff8dc4 100644
--- a/drivers/ata/sata_sis.c
+++ b/drivers/ata/sata_sis.c
@@ -240,7 +240,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
struct ata_probe_ent *probe_ent = NULL;
int rc;
u32 genctl;
- struct ata_port_info *ppi;
+ struct ata_port_info *ppi[2];
int pci_dev_busy = 0;
u8 pmr;
u8 port2_start;
@@ -265,8 +265,8 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
goto err_out_regions;
- ppi = &sis_port_info;
- probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
+ ppi[0] = ppi[1] = &sis_port_info;
+ probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
if (!probe_ent) {
rc = -ENOMEM;
goto err_out_regions;
diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c
index 8fc6e800011a..dd76f37be182 100644
--- a/drivers/ata/sata_uli.c
+++ b/drivers/ata/sata_uli.c
@@ -185,7 +185,7 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
{
static int printed_version;
struct ata_probe_ent *probe_ent;
- struct ata_port_info *ppi;
+ struct ata_port_info *ppi[2];
int rc;
unsigned int board_idx = (unsigned int) ent->driver_data;
int pci_dev_busy = 0;
@@ -211,8 +211,8 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
goto err_out_regions;
- ppi = &uli_port_info;
- probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
+ ppi[0] = ppi[1] = &uli_port_info;
+ probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
if (!probe_ent) {
rc = -ENOMEM;
goto err_out_regions;
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index 7f087aef99de..a72a2389a11c 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -318,9 +318,10 @@ static void vt6421_init_addrs(struct ata_probe_ent *probe_ent,
static struct ata_probe_ent *vt6420_init_probe_ent(struct pci_dev *pdev)
{
struct ata_probe_ent *probe_ent;
- struct ata_port_info *ppi = &vt6420_port_info;
-
- probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
+ struct ata_port_info *ppi[2];
+
+ ppi[0] = ppi[1] = &vt6420_port_info;
+ probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
if (!probe_ent)
return NULL;
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 41e052fecd7f..f2511b42dba2 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -454,7 +454,7 @@ rate_to_atmf(unsigned rate) /* cps to atm forum format */
return (NONZERO | (exp << 9) | (rate & 0x1ff));
}
-static void __init
+static void __devinit
he_init_rx_lbfp0(struct he_dev *he_dev)
{
unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count;
@@ -485,7 +485,7 @@ he_init_rx_lbfp0(struct he_dev *he_dev)
he_writel(he_dev, he_dev->r0_numbuffs, RLBF0_C);
}
-static void __init
+static void __devinit
he_init_rx_lbfp1(struct he_dev *he_dev)
{
unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count;
@@ -516,7 +516,7 @@ he_init_rx_lbfp1(struct he_dev *he_dev)
he_writel(he_dev, he_dev->r1_numbuffs, RLBF1_C);
}
-static void __init
+static void __devinit
he_init_tx_lbfp(struct he_dev *he_dev)
{
unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count;
@@ -546,7 +546,7 @@ he_init_tx_lbfp(struct he_dev *he_dev)
he_writel(he_dev, lbufd_index - 1, TLBF_T);
}
-static int __init
+static int __devinit
he_init_tpdrq(struct he_dev *he_dev)
{
he_dev->tpdrq_base = pci_alloc_consistent(he_dev->pci_dev,
@@ -568,7 +568,7 @@ he_init_tpdrq(struct he_dev *he_dev)
return 0;
}
-static void __init
+static void __devinit
he_init_cs_block(struct he_dev *he_dev)
{
unsigned clock, rate, delta;
@@ -664,7 +664,7 @@ he_init_cs_block(struct he_dev *he_dev)
}
-static int __init
+static int __devinit
he_init_cs_block_rcm(struct he_dev *he_dev)
{
unsigned (*rategrid)[16][16];
@@ -785,7 +785,7 @@ he_init_cs_block_rcm(struct he_dev *he_dev)
return 0;
}
-static int __init
+static int __devinit
he_init_group(struct he_dev *he_dev, int group)
{
int i;
@@ -955,7 +955,7 @@ he_init_group(struct he_dev *he_dev, int group)
return 0;
}
-static int __init
+static int __devinit
he_init_irq(struct he_dev *he_dev)
{
int i;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index e9b0957f15d1..001e6f6b9c1b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -54,10 +54,12 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
"Node %d MemUsed: %8lu kB\n"
"Node %d Active: %8lu kB\n"
"Node %d Inactive: %8lu kB\n"
+#ifdef CONFIG_HIGHMEM
"Node %d HighTotal: %8lu kB\n"
"Node %d HighFree: %8lu kB\n"
"Node %d LowTotal: %8lu kB\n"
"Node %d LowFree: %8lu kB\n"
+#endif
"Node %d Dirty: %8lu kB\n"
"Node %d Writeback: %8lu kB\n"
"Node %d FilePages: %8lu kB\n"
@@ -66,16 +68,20 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
- "Node %d Slab: %8lu kB\n",
+ "Node %d Slab: %8lu kB\n"
+ "Node %d SReclaimable: %8lu kB\n"
+ "Node %d SUnreclaim: %8lu kB\n",
nid, K(i.totalram),
nid, K(i.freeram),
nid, K(i.totalram - i.freeram),
nid, K(active),
nid, K(inactive),
+#ifdef CONFIG_HIGHMEM
nid, K(i.totalhigh),
nid, K(i.freehigh),
nid, K(i.totalram - i.totalhigh),
nid, K(i.freeram - i.freehigh),
+#endif
nid, K(node_page_state(nid, NR_FILE_DIRTY)),
nid, K(node_page_state(nid, NR_WRITEBACK)),
nid, K(node_page_state(nid, NR_FILE_PAGES)),
@@ -84,7 +90,10 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
nid, K(node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(nid, NR_UNSTABLE_NFS)),
nid, K(node_page_state(nid, NR_BOUNCE)),
- nid, K(node_page_state(nid, NR_SLAB)));
+ nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) +
+ node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
+ nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)),
+ nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)));
n += hugetlb_report_node_meminfo(nid, buf + n);
return n;
}
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 6e6a7c7a7eff..ab6429b4a84e 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -209,11 +209,12 @@ static const unsigned char days_in_mo[] =
*/
static inline unsigned char rtc_is_updating(void)
{
+ unsigned long flags;
unsigned char uip;
- spin_lock_irq(&rtc_lock);
+ spin_lock_irqsave(&rtc_lock, flags);
uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
- spin_unlock_irq(&rtc_lock);
+ spin_unlock_irqrestore(&rtc_lock, flags);
return uip;
}
diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c
index 71f27e955d87..c7854ea57b52 100644
--- a/drivers/ide/mips/au1xxx-ide.c
+++ b/drivers/ide/mips/au1xxx-ide.c
@@ -476,13 +476,13 @@ static int auide_dma_lostirq(ide_drive_t *drive)
return 0;
}
-static void auide_ddma_tx_callback(int irq, void *param, struct pt_regs *regs)
+static void auide_ddma_tx_callback(int irq, void *param)
{
_auide_hwif *ahwif = (_auide_hwif*)param;
ahwif->drive->waiting_for_dma = 0;
}
-static void auide_ddma_rx_callback(int irq, void *param, struct pt_regs *regs)
+static void auide_ddma_rx_callback(int irq, void *param)
{
_auide_hwif *ahwif = (_auide_hwif*)param;
ahwif->drive->waiting_for_dma = 0;
diff --git a/drivers/media/video/videodev.c b/drivers/media/video/videodev.c
index 88bf2af2a0e7..edd7b83c3464 100644
--- a/drivers/media/video/videodev.c
+++ b/drivers/media/video/videodev.c
@@ -836,7 +836,7 @@ static int __video_do_ioctl(struct inode *inode, struct file *file,
break;
}
- if (index<=0 || index >= vfd->tvnormsize) {
+ if (index < 0 || index >= vfd->tvnormsize) {
ret=-EINVAL;
break;
}
diff --git a/drivers/mmc/au1xmmc.c b/drivers/mmc/au1xmmc.c
index fb606165af3b..61268da13957 100644
--- a/drivers/mmc/au1xmmc.c
+++ b/drivers/mmc/au1xmmc.c
@@ -731,7 +731,7 @@ static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios)
}
}
-static void au1xmmc_dma_callback(int irq, void *dev_id, struct pt_regs *regs)
+static void au1xmmc_dma_callback(int irq, void *dev_id)
{
struct au1xmmc_host *host = (struct au1xmmc_host *) dev_id;
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index 77670741e101..feb42db10ee1 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -1323,9 +1323,9 @@ static const struct ethtool_ops sparc_lance_ethtool_ops = {
.get_link = sparc_lance_get_link,
};
-static int __init sparc_lance_probe_one(struct sbus_dev *sdev,
- struct sbus_dma *ledma,
- struct sbus_dev *lebuffer)
+static int __devinit sparc_lance_probe_one(struct sbus_dev *sdev,
+ struct sbus_dma *ledma,
+ struct sbus_dev *lebuffer)
{
static unsigned version_printed;
struct net_device *dev;
@@ -1515,7 +1515,7 @@ fail:
}
/* On 4m, find the associated dma for the lance chip */
-static inline struct sbus_dma *find_ledma(struct sbus_dev *sdev)
+static struct sbus_dma * __devinit find_ledma(struct sbus_dev *sdev)
{
struct sbus_dma *p;
@@ -1533,7 +1533,7 @@ static inline struct sbus_dma *find_ledma(struct sbus_dev *sdev)
/* Find all the lance cards on the system and initialize them */
static struct sbus_dev sun4_sdev;
-static int __init sparc_lance_init(void)
+static int __devinit sparc_lance_init(void)
{
if ((idprom->id_machtype == (SM_SUN4|SM_4_330)) ||
(idprom->id_machtype == (SM_SUN4|SM_4_470))) {
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 372e47f7d596..5f7ba1adb309 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -1929,6 +1929,13 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
mutex_lock(&state->mutex);
+#ifdef CONFIG_DISABLE_CONSOLE_SUSPEND
+ if (uart_console(port)) {
+ mutex_unlock(&state->mutex);
+ return 0;
+ }
+#endif
+
if (state->info && state->info->flags & UIF_INITIALIZED) {
const struct uart_ops *ops = port->ops;
@@ -1967,6 +1974,13 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
mutex_lock(&state->mutex);
+#ifdef CONFIG_DISABLE_CONSOLE_SUSPEND
+ if (uart_console(port)) {
+ mutex_unlock(&state->mutex);
+ return 0;
+ }
+#endif
+
uart_change_pm(state, 0);
/*
diff --git a/drivers/video/fbsysfs.c b/drivers/video/fbsysfs.c
index 4f78f234473d..c151dcf68786 100644
--- a/drivers/video/fbsysfs.c
+++ b/drivers/video/fbsysfs.c
@@ -397,6 +397,12 @@ static ssize_t store_bl_curve(struct class_device *class_device,
u8 tmp_curve[FB_BACKLIGHT_LEVELS];
unsigned int i;
+ /* Some drivers don't use framebuffer_alloc(), but those also
+ * don't have backlights.
+ */
+ if (!fb_info || !fb_info->bl_dev)
+ return -ENODEV;
+
if (count != (FB_BACKLIGHT_LEVELS / 8 * 24))
return -EINVAL;
@@ -430,6 +436,12 @@ static ssize_t show_bl_curve(struct class_device *class_device, char *buf)
ssize_t len = 0;
unsigned int i;
+ /* Some drivers don't use framebuffer_alloc(), but those also
+ * don't have backlights.
+ */
+ if (!fb_info || !fb_info->bl_dev)
+ return -ENODEV;
+
mutex_lock(&fb_info->bl_mutex);
for (i = 0; i < FB_BACKLIGHT_LEVELS; i += 8)
len += snprintf(&buf[len], PAGE_SIZE,
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 8dbd44f10e9d..d96e5c14a9ca 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -32,7 +32,7 @@ static inline int autofs4_can_expire(struct dentry *dentry,
if (!do_now) {
/* Too young to die */
- if (time_after(ino->last_used + timeout, now))
+ if (!timeout || time_after(ino->last_used + timeout, now))
return 0;
/* update last_used here :-
@@ -253,7 +253,7 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
struct dentry *root = dget(sb->s_root);
int do_now = how & AUTOFS_EXP_IMMEDIATE;
- if (!sbi->exp_timeout || !root)
+ if (!root)
return NULL;
now = jiffies;
@@ -293,7 +293,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
int do_now = how & AUTOFS_EXP_IMMEDIATE;
int exp_leaves = how & AUTOFS_EXP_LEAVES;
- if ( !sbi->exp_timeout || !root )
+ if (!root)
return NULL;
now = jiffies;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 672a3b90bc55..64802aabd1ac 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1262,7 +1262,7 @@ static void fill_elf_header(struct elfhdr *elf, int segs)
return;
}
-static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
+static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
{
phdr->p_type = PT_NOTE;
phdr->p_offset = offset;
@@ -1428,7 +1428,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
int i;
struct vm_area_struct *vma;
struct elfhdr *elf = NULL;
- off_t offset = 0, dataoff;
+ loff_t offset = 0, dataoff;
unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
int numnote;
struct memelfnote *notes = NULL;
@@ -1661,11 +1661,11 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
ELF_CORE_WRITE_EXTRA_DATA;
#endif
- if ((off_t)file->f_pos != offset) {
+ if (file->f_pos != offset) {
/* Sanity check */
printk(KERN_WARNING
- "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
- (off_t)file->f_pos, offset);
+ "elf_core_dump: file->f_pos (%Ld) != offset (%Ld)\n",
+ file->f_pos, offset);
}
end_coredump:
diff --git a/fs/buffer.c b/fs/buffer.c
index 71649ef9b658..3b6d701073e7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2987,6 +2987,7 @@ int try_to_free_buffers(struct page *page)
spin_lock(&mapping->private_lock);
ret = drop_buffers(page, &buffers_to_free);
+ spin_unlock(&mapping->private_lock);
if (ret) {
/*
* If the filesystem writes its buffers by hand (eg ext3)
@@ -2998,7 +2999,6 @@ int try_to_free_buffers(struct page *page)
*/
clear_page_dirty(page);
}
- spin_unlock(&mapping->private_lock);
out:
if (buffers_to_free) {
struct buffer_head *bh = buffers_to_free;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 42da60784311..32a8caf0c41e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -160,6 +160,117 @@ static int journal_write_commit_record(journal_t *journal,
return (ret == -EIO);
}
+static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
+{
+ int i;
+
+ for (i = 0; i < bufs; i++) {
+ wbuf[i]->b_end_io = end_buffer_write_sync;
+ /* We use-up our safety reference in submit_bh() */
+ submit_bh(WRITE, wbuf[i]);
+ }
+}
+
+/*
+ * Submit all the data buffers to disk
+ */
+static void journal_submit_data_buffers(journal_t *journal,
+ transaction_t *commit_transaction)
+{
+ struct journal_head *jh;
+ struct buffer_head *bh;
+ int locked;
+ int bufs = 0;
+ struct buffer_head **wbuf = journal->j_wbuf;
+
+ /*
+ * Whenever we unlock the journal and sleep, things can get added
+ * onto ->t_sync_datalist, so we have to keep looping back to
+ * write_out_data until we *know* that the list is empty.
+ *
+ * Cleanup any flushed data buffers from the data list. Even in
+ * abort mode, we want to flush this out as soon as possible.
+ */
+write_out_data:
+ cond_resched();
+ spin_lock(&journal->j_list_lock);
+
+ while (commit_transaction->t_sync_datalist) {
+ jh = commit_transaction->t_sync_datalist;
+ bh = jh2bh(jh);
+ locked = 0;
+
+ /* Get reference just to make sure buffer does not disappear
+ * when we are forced to drop various locks */
+ get_bh(bh);
+ /* If the buffer is dirty, we need to submit IO and hence
+ * we need the buffer lock. We try to lock the buffer without
+ * blocking. If we fail, we need to drop j_list_lock and do
+ * blocking lock_buffer().
+ */
+ if (buffer_dirty(bh)) {
+ if (test_set_buffer_locked(bh)) {
+ BUFFER_TRACE(bh, "needs blocking lock");
+ spin_unlock(&journal->j_list_lock);
+ /* Write out all data to prevent deadlocks */
+ journal_do_submit_data(wbuf, bufs);
+ bufs = 0;
+ lock_buffer(bh);
+ spin_lock(&journal->j_list_lock);
+ }
+ locked = 1;
+ }
+ /* We have to get bh_state lock. Again out of order, sigh. */
+ if (!inverted_lock(journal, bh)) {
+ jbd_lock_bh_state(bh);
+ spin_lock(&journal->j_list_lock);
+ }
+ /* Someone already cleaned up the buffer? */
+ if (!buffer_jbd(bh)
+ || jh->b_transaction != commit_transaction
+ || jh->b_jlist != BJ_SyncData) {
+ jbd_unlock_bh_state(bh);
+ if (locked)
+ unlock_buffer(bh);
+ BUFFER_TRACE(bh, "already cleaned up");
+ put_bh(bh);
+ continue;
+ }
+ if (locked && test_clear_buffer_dirty(bh)) {
+ BUFFER_TRACE(bh, "needs writeout, adding to array");
+ wbuf[bufs++] = bh;
+ __journal_file_buffer(jh, commit_transaction,
+ BJ_Locked);
+ jbd_unlock_bh_state(bh);
+ if (bufs == journal->j_wbufsize) {
+ spin_unlock(&journal->j_list_lock);
+ journal_do_submit_data(wbuf, bufs);
+ bufs = 0;
+ goto write_out_data;
+ }
+ }
+ else {
+ BUFFER_TRACE(bh, "writeout complete: unfile");
+ __journal_unfile_buffer(jh);
+ jbd_unlock_bh_state(bh);
+ if (locked)
+ unlock_buffer(bh);
+ journal_remove_journal_head(bh);
+ /* Once for our safety reference, once for
+ * journal_remove_journal_head() */
+ put_bh(bh);
+ put_bh(bh);
+ }
+
+ if (lock_need_resched(&journal->j_list_lock)) {
+ spin_unlock(&journal->j_list_lock);
+ goto write_out_data;
+ }
+ }
+ spin_unlock(&journal->j_list_lock);
+ journal_do_submit_data(wbuf, bufs);
+}
+
/*
* journal_commit_transaction
*
@@ -313,80 +424,13 @@ void journal_commit_transaction(journal_t *journal)
* Now start flushing things to disk, in the order they appear
* on the transaction lists. Data blocks go first.
*/
-
err = 0;
- /*
- * Whenever we unlock the journal and sleep, things can get added
- * onto ->t_sync_datalist, so we have to keep looping back to
- * write_out_data until we *know* that the list is empty.
- */
- bufs = 0;
- /*
- * Cleanup any flushed data buffers from the data list. Even in
- * abort mode, we want to flush this out as soon as possible.
- */
-write_out_data:
- cond_resched();
- spin_lock(&journal->j_list_lock);
-
- while (commit_transaction->t_sync_datalist) {
- struct buffer_head *bh;
-
- jh = commit_transaction->t_sync_datalist;
- commit_transaction->t_sync_datalist = jh->b_tnext;
- bh = jh2bh(jh);
- if (buffer_locked(bh)) {
- BUFFER_TRACE(bh, "locked");
- if (!inverted_lock(journal, bh))
- goto write_out_data;
- __journal_temp_unlink_buffer(jh);
- __journal_file_buffer(jh, commit_transaction,
- BJ_Locked);
- jbd_unlock_bh_state(bh);
- if (lock_need_resched(&journal->j_list_lock)) {
- spin_unlock(&journal->j_list_lock);
- goto write_out_data;
- }
- } else {
- if (buffer_dirty(bh)) {
- BUFFER_TRACE(bh, "start journal writeout");
- get_bh(bh);
- wbuf[bufs++] = bh;
- if (bufs == journal->j_wbufsize) {
- jbd_debug(2, "submit %d writes\n",
- bufs);
- spin_unlock(&journal->j_list_lock);
- ll_rw_block(SWRITE, bufs, wbuf);
- journal_brelse_array(wbuf, bufs);
- bufs = 0;
- goto write_out_data;
- }
- } else {
- BUFFER_TRACE(bh, "writeout complete: unfile");
- if (!inverted_lock(journal, bh))
- goto write_out_data;
- __journal_unfile_buffer(jh);
- jbd_unlock_bh_state(bh);
- journal_remove_journal_head(bh);
- put_bh(bh);
- if (lock_need_resched(&journal->j_list_lock)) {
- spin_unlock(&journal->j_list_lock);
- goto write_out_data;
- }
- }
- }
- }
-
- if (bufs) {
- spin_unlock(&journal->j_list_lock);
- ll_rw_block(SWRITE, bufs, wbuf);
- journal_brelse_array(wbuf, bufs);
- spin_lock(&journal->j_list_lock);
- }
+ journal_submit_data_buffers(journal, commit_transaction);
/*
* Wait for all previously submitted IO to complete.
*/
+ spin_lock(&journal->j_list_lock);
while (commit_transaction->t_locked_list) {
struct buffer_head *bh;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 942156225447..5bbd60896050 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -157,10 +157,12 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
"SwapCached: %8lu kB\n"
"Active: %8lu kB\n"
"Inactive: %8lu kB\n"
+#ifdef CONFIG_HIGHMEM
"HighTotal: %8lu kB\n"
"HighFree: %8lu kB\n"
"LowTotal: %8lu kB\n"
"LowFree: %8lu kB\n"
+#endif
"SwapTotal: %8lu kB\n"
"SwapFree: %8lu kB\n"
"Dirty: %8lu kB\n"
@@ -168,6 +170,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
"AnonPages: %8lu kB\n"
"Mapped: %8lu kB\n"
"Slab: %8lu kB\n"
+ "SReclaimable: %8lu kB\n"
+ "SUnreclaim: %8lu kB\n"
"PageTables: %8lu kB\n"
"NFS_Unstable: %8lu kB\n"
"Bounce: %8lu kB\n"
@@ -183,17 +187,22 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
K(total_swapcache_pages),
K(active),
K(inactive),
+#ifdef CONFIG_HIGHMEM
K(i.totalhigh),
K(i.freehigh),
K(i.totalram-i.totalhigh),
K(i.freeram-i.freehigh),
+#endif
K(i.totalswap),
K(i.freeswap),
K(global_page_state(NR_FILE_DIRTY)),
K(global_page_state(NR_WRITEBACK)),
K(global_page_state(NR_ANON_PAGES)),
K(global_page_state(NR_FILE_MAPPED)),
- K(global_page_state(NR_SLAB)),
+ K(global_page_state(NR_SLAB_RECLAIMABLE) +
+ global_page_state(NR_SLAB_UNRECLAIMABLE)),
+ K(global_page_state(NR_SLAB_RECLAIMABLE)),
+ K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
K(global_page_state(NR_PAGETABLE)),
K(global_page_state(NR_UNSTABLE_NFS)),
K(global_page_state(NR_BOUNCE)),
diff --git a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h
index 64d0ab98fcd8..8af56ce346ad 100644
--- a/include/asm-alpha/mmzone.h
+++ b/include/asm-alpha/mmzone.h
@@ -75,6 +75,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr)
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> 32))
+#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> 32))
#define pte_pfn(pte) (pte_val(pte) >> 32)
#define mk_pte(page, pgprot) \
diff --git a/include/asm-alpha/pgtable.h b/include/asm-alpha/pgtable.h
index 93eaa58b7961..49ac9bee7ced 100644
--- a/include/asm-alpha/pgtable.h
+++ b/include/asm-alpha/pgtable.h
@@ -230,16 +230,17 @@ extern inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
extern inline unsigned long
-pmd_page_kernel(pmd_t pmd)
+pmd_page_vaddr(pmd_t pmd)
{
return ((pmd_val(pmd) & _PFN_MASK) >> (32-PAGE_SHIFT)) + PAGE_OFFSET;
}
#ifndef CONFIG_DISCONTIGMEM
#define pmd_page(pmd) (mem_map + ((pmd_val(pmd) & _PFN_MASK) >> 32))
+#define pgd_page(pgd) (mem_map + ((pgd_val(pgd) & _PFN_MASK) >> 32))
#endif
-extern inline unsigned long pgd_page(pgd_t pgd)
+extern inline unsigned long pgd_page_vaddr(pgd_t pgd)
{ return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); }
extern inline int pte_none(pte_t pte) { return !pte_val(pte); }
@@ -293,13 +294,13 @@ extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; retu
/* Find an entry in the second-level page table.. */
extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
{
- return (pmd_t *) pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
+ return (pmd_t *) pgd_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
}
/* Find an entry in the third-level page table.. */
extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address)
{
- return (pte_t *) pmd_page_kernel(*dir)
+ return (pte_t *) pmd_page_vaddr(*dir)
+ ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1));
}
diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h
index 8d3919c6458c..4d10d319fa34 100644
--- a/include/asm-arm/pgtable.h
+++ b/include/asm-arm/pgtable.h
@@ -224,9 +224,9 @@ extern struct page *empty_zero_page;
#define pte_none(pte) (!pte_val(pte))
#define pte_clear(mm,addr,ptep) set_pte_at((mm),(addr),(ptep), __pte(0))
#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
-#define pte_offset_kernel(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr))
-#define pte_offset_map(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr))
-#define pte_offset_map_nested(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr))
+#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
+#define pte_offset_map(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
+#define pte_offset_map_nested(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
#define pte_unmap(pte) do { } while (0)
#define pte_unmap_nested(pte) do { } while (0)
@@ -291,7 +291,7 @@ PTE_BIT_FUNC(mkyoung, |= L_PTE_YOUNG);
clean_pmd_entry(pmdp); \
} while (0)
-static inline pte_t *pmd_page_kernel(pmd_t pmd)
+static inline pte_t *pmd_page_vaddr(pmd_t pmd)
{
unsigned long ptr;
diff --git a/include/asm-arm26/pgtable.h b/include/asm-arm26/pgtable.h
index 19ac9101a6bb..63a8881fae13 100644
--- a/include/asm-arm26/pgtable.h
+++ b/include/asm-arm26/pgtable.h
@@ -186,12 +186,12 @@ extern struct page *empty_zero_page;
* return a pointer to memory (no special alignment)
*/
#define pmd_page(pmd) ((struct page *)(pmd_val((pmd)) & ~_PMD_PRESENT))
-#define pmd_page_kernel(pmd) ((pte_t *)(pmd_val((pmd)) & ~_PMD_PRESENT))
+#define pmd_page_vaddr(pmd) ((pte_t *)(pmd_val((pmd)) & ~_PMD_PRESENT))
-#define pte_offset_kernel(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr))
+#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
-#define pte_offset_map(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr))
-#define pte_offset_map_nested(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr))
+#define pte_offset_map(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
+#define pte_offset_map_nested(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
#define pte_unmap(pte) do { } while (0)
#define pte_unmap_nested(pte) do { } while (0)
diff --git a/include/asm-avr32/Kbuild b/include/asm-avr32/Kbuild
new file mode 100644
index 000000000000..8770e73ce938
--- /dev/null
+++ b/include/asm-avr32/Kbuild
@@ -0,0 +1,3 @@
+include include/asm-generic/Kbuild.asm
+
+headers-y += cachectl.h
diff --git a/include/asm-avr32/a.out.h b/include/asm-avr32/a.out.h
new file mode 100644
index 000000000000..50bf6e31a143
--- /dev/null
+++ b/include/asm-avr32/a.out.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_AVR32_A_OUT_H
+#define __ASM_AVR32_A_OUT_H
+
+struct exec
+{
+ unsigned long a_info; /* Use macros N_MAGIC, etc for access */
+ unsigned a_text; /* length of text, in bytes */
+ unsigned a_data; /* length of data, in bytes */
+ unsigned a_bss; /* length of uninitialized data area for file, in bytes */
+ unsigned a_syms; /* length of symbol table data in file, in bytes */
+ unsigned a_entry; /* start address */
+ unsigned a_trsize; /* length of relocation info for text, in bytes */
+ unsigned a_drsize; /* length of relocation info for data, in bytes */
+};
+
+#define N_TRSIZE(a) ((a).a_trsize)
+#define N_DRSIZE(a) ((a).a_drsize)
+#define N_SYMSIZE(a) ((a).a_syms)
+
+#ifdef __KERNEL__
+
+#define STACK_TOP TASK_SIZE
+
+#endif
+
+#endif /* __ASM_AVR32_A_OUT_H */
diff --git a/include/asm-avr32/addrspace.h b/include/asm-avr32/addrspace.h
new file mode 100644
index 000000000000..366794858ec7
--- /dev/null
+++ b/include/asm-avr32/addrspace.h
@@ -0,0 +1,43 @@
+/*
+ * Defitions for the address spaces of the AVR32 CPUs. Heavily based on
+ * include/asm-sh/addrspace.h
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_ADDRSPACE_H
+#define __ASM_AVR32_ADDRSPACE_H
+
+#ifdef CONFIG_MMU
+
+/* Memory segments when segmentation is enabled */
+#define P0SEG 0x00000000
+#define P1SEG 0x80000000
+#define P2SEG 0xa0000000
+#define P3SEG 0xc0000000
+#define P4SEG 0xe0000000
+
+/* Returns the privileged segment base of a given address */
+#define PXSEG(a) (((unsigned long)(a)) & 0xe0000000)
+
+/* Returns the physical address of a PnSEG (n=1,2) address */
+#define PHYSADDR(a) (((unsigned long)(a)) & 0x1fffffff)
+
+/*
+ * Map an address to a certain privileged segment
+ */
+#define P1SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
+ | P1SEG))
+#define P2SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
+ | P2SEG))
+#define P3SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
+ | P3SEG))
+#define P4SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
+ | P4SEG))
+
+#endif /* CONFIG_MMU */
+
+#endif /* __ASM_AVR32_ADDRSPACE_H */
diff --git a/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h b/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h
new file mode 100644
index 000000000000..ce1150d4438d
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h
@@ -0,0 +1,36 @@
+/*
+ * include/asm-arm/arch-at91rm9200/at91rm9200_pdc.h
+ *
+ * Copyright (C) 2005 Ivan Kokshaysky
+ * Copyright (C) SAN People
+ *
+ * Peripheral Data Controller (PDC) registers.
+ * Based on AT91RM9200 datasheet revision E.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef AT91RM9200_PDC_H
+#define AT91RM9200_PDC_H
+
+#define AT91_PDC_RPR 0x100 /* Receive Pointer Register */
+#define AT91_PDC_RCR 0x104 /* Receive Counter Register */
+#define AT91_PDC_TPR 0x108 /* Transmit Pointer Register */
+#define AT91_PDC_TCR 0x10c /* Transmit Counter Register */
+#define AT91_PDC_RNPR 0x110 /* Receive Next Pointer Register */
+#define AT91_PDC_RNCR 0x114 /* Receive Next Counter Register */
+#define AT91_PDC_TNPR 0x118 /* Transmit Next Pointer Register */
+#define AT91_PDC_TNCR 0x11c /* Transmit Next Counter Register */
+
+#define AT91_PDC_PTCR 0x120 /* Transfer Control Register */
+#define AT91_PDC_RXTEN (1 << 0) /* Receiver Transfer Enable */
+#define AT91_PDC_RXTDIS (1 << 1) /* Receiver Transfer Disable */
+#define AT91_PDC_TXTEN (1 << 8) /* Transmitter Transfer Enable */
+#define AT91_PDC_TXTDIS (1 << 9) /* Transmitter Transfer Disable */
+
+#define AT91_PDC_PTSR 0x124 /* Transfer Status Register */
+
+#endif
diff --git a/include/asm-avr32/arch-at32ap/at91rm9200_usart.h b/include/asm-avr32/arch-at32ap/at91rm9200_usart.h
new file mode 100644
index 000000000000..79f851e31b9c
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/at91rm9200_usart.h
@@ -0,0 +1,123 @@
+/*
+ * include/asm-arm/arch-at91rm9200/at91rm9200_usart.h
+ *
+ * Copyright (C) 2005 Ivan Kokshaysky
+ * Copyright (C) SAN People
+ *
+ * USART registers.
+ * Based on AT91RM9200 datasheet revision E.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef AT91RM9200_USART_H
+#define AT91RM9200_USART_H
+
+#define AT91_US_CR 0x00 /* Control Register */
+#define AT91_US_RSTRX (1 << 2) /* Reset Receiver */
+#define AT91_US_RSTTX (1 << 3) /* Reset Transmitter */
+#define AT91_US_RXEN (1 << 4) /* Receiver Enable */
+#define AT91_US_RXDIS (1 << 5) /* Receiver Disable */
+#define AT91_US_TXEN (1 << 6) /* Transmitter Enable */
+#define AT91_US_TXDIS (1 << 7) /* Transmitter Disable */
+#define AT91_US_RSTSTA (1 << 8) /* Reset Status Bits */
+#define AT91_US_STTBRK (1 << 9) /* Start Break */
+#define AT91_US_STPBRK (1 << 10) /* Stop Break */
+#define AT91_US_STTTO (1 << 11) /* Start Time-out */
+#define AT91_US_SENDA (1 << 12) /* Send Address */
+#define AT91_US_RSTIT (1 << 13) /* Reset Iterations */
+#define AT91_US_RSTNACK (1 << 14) /* Reset Non Acknowledge */
+#define AT91_US_RETTO (1 << 15) /* Rearm Time-out */
+#define AT91_US_DTREN (1 << 16) /* Data Terminal Ready Enable */
+#define AT91_US_DTRDIS (1 << 17) /* Data Terminal Ready Disable */
+#define AT91_US_RTSEN (1 << 18) /* Request To Send Enable */
+#define AT91_US_RTSDIS (1 << 19) /* Request To Send Disable */
+
+#define AT91_US_MR 0x04 /* Mode Register */
+#define AT91_US_USMODE (0xf << 0) /* Mode of the USART */
+#define AT91_US_USMODE_NORMAL 0
+#define AT91_US_USMODE_RS485 1
+#define AT91_US_USMODE_HWHS 2
+#define AT91_US_USMODE_MODEM 3
+#define AT91_US_USMODE_ISO7816_T0 4
+#define AT91_US_USMODE_ISO7816_T1 6
+#define AT91_US_USMODE_IRDA 8
+#define AT91_US_USCLKS (3 << 4) /* Clock Selection */
+#define AT91_US_CHRL (3 << 6) /* Character Length */
+#define AT91_US_CHRL_5 (0 << 6)
+#define AT91_US_CHRL_6 (1 << 6)
+#define AT91_US_CHRL_7 (2 << 6)
+#define AT91_US_CHRL_8 (3 << 6)
+#define AT91_US_SYNC (1 << 8) /* Synchronous Mode Select */
+#define AT91_US_PAR (7 << 9) /* Parity Type */
+#define AT91_US_PAR_EVEN (0 << 9)
+#define AT91_US_PAR_ODD (1 << 9)
+#define AT91_US_PAR_SPACE (2 << 9)
+#define AT91_US_PAR_MARK (3 << 9)
+#define AT91_US_PAR_NONE (4 << 9)
+#define AT91_US_PAR_MULTI_DROP (6 << 9)
+#define AT91_US_NBSTOP (3 << 12) /* Number of Stop Bits */
+#define AT91_US_NBSTOP_1 (0 << 12)
+#define AT91_US_NBSTOP_1_5 (1 << 12)
+#define AT91_US_NBSTOP_2 (2 << 12)
+#define AT91_US_CHMODE (3 << 14) /* Channel Mode */
+#define AT91_US_CHMODE_NORMAL (0 << 14)
+#define AT91_US_CHMODE_ECHO (1 << 14)
+#define AT91_US_CHMODE_LOC_LOOP (2 << 14)
+#define AT91_US_CHMODE_REM_LOOP (3 << 14)
+#define AT91_US_MSBF (1 << 16) /* Bit Order */
+#define AT91_US_MODE9 (1 << 17) /* 9-bit Character Length */
+#define AT91_US_CLKO (1 << 18) /* Clock Output Select */
+#define AT91_US_OVER (1 << 19) /* Oversampling Mode */
+#define AT91_US_INACK (1 << 20) /* Inhibit Non Acknowledge */
+#define AT91_US_DSNACK (1 << 21) /* Disable Successive NACK */
+#define AT91_US_MAX_ITER (7 << 24) /* Max Iterations */
+#define AT91_US_FILTER (1 << 28) /* Infrared Receive Line Filter */
+
+#define AT91_US_IER 0x08 /* Interrupt Enable Register */
+#define AT91_US_RXRDY (1 << 0) /* Receiver Ready */
+#define AT91_US_TXRDY (1 << 1) /* Transmitter Ready */
+#define AT91_US_RXBRK (1 << 2) /* Break Received / End of Break */
+#define AT91_US_ENDRX (1 << 3) /* End of Receiver Transfer */
+#define AT91_US_ENDTX (1 << 4) /* End of Transmitter Transfer */
+#define AT91_US_OVRE (1 << 5) /* Overrun Error */
+#define AT91_US_FRAME (1 << 6) /* Framing Error */
+#define AT91_US_PARE (1 << 7) /* Parity Error */
+#define AT91_US_TIMEOUT (1 << 8) /* Receiver Time-out */
+#define AT91_US_TXEMPTY (1 << 9) /* Transmitter Empty */
+#define AT91_US_ITERATION (1 << 10) /* Max number of Repetitions Reached */
+#define AT91_US_TXBUFE (1 << 11) /* Transmission Buffer Empty */
+#define AT91_US_RXBUFF (1 << 12) /* Reception Buffer Full */
+#define AT91_US_NACK (1 << 13) /* Non Acknowledge */
+#define AT91_US_RIIC (1 << 16) /* Ring Indicator Input Change */
+#define AT91_US_DSRIC (1 << 17) /* Data Set Ready Input Change */
+#define AT91_US_DCDIC (1 << 18) /* Data Carrier Detect Input Change */
+#define AT91_US_CTSIC (1 << 19) /* Clear to Send Input Change */
+#define AT91_US_RI (1 << 20) /* RI */
+#define AT91_US_DSR (1 << 21) /* DSR */
+#define AT91_US_DCD (1 << 22) /* DCD */
+#define AT91_US_CTS (1 << 23) /* CTS */
+
+#define AT91_US_IDR 0x0c /* Interrupt Disable Register */
+#define AT91_US_IMR 0x10 /* Interrupt Mask Register */
+#define AT91_US_CSR 0x14 /* Channel Status Register */
+#define AT91_US_RHR 0x18 /* Receiver Holding Register */
+#define AT91_US_THR 0x1c /* Transmitter Holding Register */
+
+#define AT91_US_BRGR 0x20 /* Baud Rate Generator Register */
+#define AT91_US_CD (0xffff << 0) /* Clock Divider */
+
+#define AT91_US_RTOR 0x24 /* Receiver Time-out Register */
+#define AT91_US_TO (0xffff << 0) /* Time-out Value */
+
+#define AT91_US_TTGR 0x28 /* Transmitter Timeguard Register */
+#define AT91_US_TG (0xff << 0) /* Timeguard Value */
+
+#define AT91_US_FIDI 0x40 /* FI DI Ratio Register */
+#define AT91_US_NER 0x44 /* Number of Errors Register */
+#define AT91_US_IF 0x4c /* IrDA Filter Register */
+
+#endif
diff --git a/include/asm-avr32/arch-at32ap/board.h b/include/asm-avr32/arch-at32ap/board.h
new file mode 100644
index 000000000000..39368e18ab20
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/board.h
@@ -0,0 +1,35 @@
+/*
+ * Platform data definitions.
+ */
+#ifndef __ASM_ARCH_BOARD_H
+#define __ASM_ARCH_BOARD_H
+
+#include <linux/types.h>
+
+/* Add basic devices: system manager, interrupt controller, portmuxes, etc. */
+void at32_add_system_devices(void);
+
+#define AT91_NR_UART 4
+extern struct platform_device *at91_default_console_device;
+
+struct platform_device *at32_add_device_usart(unsigned int id);
+
+struct eth_platform_data {
+ u8 valid;
+ u8 mii_phy_addr;
+ u8 is_rmii;
+ u8 hw_addr[6];
+};
+struct platform_device *
+at32_add_device_eth(unsigned int id, struct eth_platform_data *data);
+
+struct platform_device *at32_add_device_spi(unsigned int id);
+
+struct lcdc_platform_data {
+ unsigned long fbmem_start;
+ unsigned long fbmem_size;
+};
+struct platform_device *
+at32_add_device_lcdc(unsigned int id, struct lcdc_platform_data *data);
+
+#endif /* __ASM_ARCH_BOARD_H */
diff --git a/include/asm-avr32/arch-at32ap/init.h b/include/asm-avr32/arch-at32ap/init.h
new file mode 100644
index 000000000000..43722634e069
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/init.h
@@ -0,0 +1,21 @@
+/*
+ * AT32AP platform initialization calls.
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_AT32AP_INIT_H__
+#define __ASM_AVR32_AT32AP_INIT_H__
+
+void setup_platform(void);
+
+/* Called by setup_platform */
+void at32_clock_init(void);
+void at32_portmux_init(void);
+
+void at32_setup_serial_console(unsigned int usart_id);
+
+#endif /* __ASM_AVR32_AT32AP_INIT_H__ */
diff --git a/include/asm-avr32/arch-at32ap/portmux.h b/include/asm-avr32/arch-at32ap/portmux.h
new file mode 100644
index 000000000000..4d50421262a1
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/portmux.h
@@ -0,0 +1,16 @@
+/*
+ * AT32 portmux interface.
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_AT32_PORTMUX_H__
+#define __ASM_AVR32_AT32_PORTMUX_H__
+
+void portmux_set_func(unsigned int portmux_id, unsigned int pin_id,
+ unsigned int function_id);
+
+#endif /* __ASM_AVR32_AT32_PORTMUX_H__ */
diff --git a/include/asm-avr32/arch-at32ap/sm.h b/include/asm-avr32/arch-at32ap/sm.h
new file mode 100644
index 000000000000..265a9ead20bf
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/sm.h
@@ -0,0 +1,27 @@
+/*
+ * AT32 System Manager interface.
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_AT32_SM_H__
+#define __ASM_AVR32_AT32_SM_H__
+
+struct irq_chip;
+struct platform_device;
+
+struct at32_sm {
+ spinlock_t lock;
+ void __iomem *regs;
+ struct irq_chip *eim_chip;
+ unsigned int eim_first_irq;
+ struct platform_device *pdev;
+};
+
+extern struct platform_device at32_sm_device;
+extern struct at32_sm system_manager;
+
+#endif /* __ASM_AVR32_AT32_SM_H__ */
diff --git a/include/asm-avr32/arch-at32ap/smc.h b/include/asm-avr32/arch-at32ap/smc.h
new file mode 100644
index 000000000000..3732b328303d
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/smc.h
@@ -0,0 +1,60 @@
+/*
+ * Static Memory Controller for AT32 chips
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * Inspired by the OMAP2 General-Purpose Memory Controller interface
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ARCH_AT32AP_SMC_H
+#define __ARCH_AT32AP_SMC_H
+
+/*
+ * All timing parameters are in nanoseconds.
+ */
+struct smc_config {
+ /* Delay from address valid to assertion of given strobe */
+ u16 ncs_read_setup;
+ u16 nrd_setup;
+ u16 ncs_write_setup;
+ u16 nwe_setup;
+
+ /* Pulse length of given strobe */
+ u16 ncs_read_pulse;
+ u16 nrd_pulse;
+ u16 ncs_write_pulse;
+ u16 nwe_pulse;
+
+ /* Total cycle length of given operation */
+ u16 read_cycle;
+ u16 write_cycle;
+
+ /* Bus width in bytes */
+ u8 bus_width;
+
+ /*
+ * 0: Data is sampled on rising edge of NCS
+ * 1: Data is sampled on rising edge of NRD
+ */
+ unsigned int nrd_controlled:1;
+
+ /*
+ * 0: Data is driven on falling edge of NCS
+ * 1: Data is driven on falling edge of NWR
+ */
+ unsigned int nwe_controlled:1;
+
+ /*
+ * 0: Byte select access type
+ * 1: Byte write access type
+ */
+ unsigned int byte_write:1;
+};
+
+extern int smc_set_configuration(int cs, const struct smc_config *config);
+extern struct smc_config *smc_get_configuration(int cs);
+
+#endif /* __ARCH_AT32AP_SMC_H */
diff --git a/include/asm-avr32/asm.h b/include/asm-avr32/asm.h
new file mode 100644
index 000000000000..515c7618952b
--- /dev/null
+++ b/include/asm-avr32/asm.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_ASM_H__
+#define __ASM_AVR32_ASM_H__
+
+#include <asm/sysreg.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+
+#define mask_interrupts ssrf SR_GM_BIT
+#define mask_exceptions ssrf SR_EM_BIT
+#define unmask_interrupts csrf SR_GM_BIT
+#define unmask_exceptions csrf SR_EM_BIT
+
+#ifdef CONFIG_FRAME_POINTER
+ .macro save_fp
+ st.w --sp, r7
+ .endm
+ .macro restore_fp
+ ld.w r7, sp++
+ .endm
+ .macro zero_fp
+ mov r7, 0
+ .endm
+#else
+ .macro save_fp
+ .endm
+ .macro restore_fp
+ .endm
+ .macro zero_fp
+ .endm
+#endif
+ .macro get_thread_info reg
+ mov \reg, sp
+ andl \reg, ~(THREAD_SIZE - 1) & 0xffff
+ .endm
+
+ /* Save and restore registers */
+ .macro save_min sr, tmp=lr
+ pushm lr
+ mfsr \tmp, \sr
+ zero_fp
+ st.w --sp, \tmp
+ .endm
+
+ .macro restore_min sr, tmp=lr
+ ld.w \tmp, sp++
+ mtsr \sr, \tmp
+ popm lr
+ .endm
+
+ .macro save_half sr, tmp=lr
+ save_fp
+ pushm r8-r9,r10,r11,r12,lr
+ zero_fp
+ mfsr \tmp, \sr
+ st.w --sp, \tmp
+ .endm
+
+ .macro restore_half sr, tmp=lr
+ ld.w \tmp, sp++
+ mtsr \sr, \tmp
+ popm r8-r9,r10,r11,r12,lr
+ restore_fp
+ .endm
+
+ .macro save_full_user sr, tmp=lr
+ stmts --sp, r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,sp,lr
+ st.w --sp, lr
+ zero_fp
+ mfsr \tmp, \sr
+ st.w --sp, \tmp
+ .endm
+
+ .macro restore_full_user sr, tmp=lr
+ ld.w \tmp, sp++
+ mtsr \sr, \tmp
+ ld.w lr, sp++
+ ldmts sp++, r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,sp,lr
+ .endm
+
+ /* uaccess macros */
+ .macro branch_if_kernel scratch, label
+ get_thread_info \scratch
+ ld.w \scratch, \scratch[TI_flags]
+ bld \scratch, TIF_USERSPACE
+ brcc \label
+ .endm
+
+ .macro ret_if_privileged scratch, addr, size, ret
+ sub \scratch, \size, 1
+ add \scratch, \addr
+ retcs \ret
+ retmi \ret
+ .endm
+
+#endif /* __ASM_AVR32_ASM_H__ */
diff --git a/include/asm-avr32/atomic.h b/include/asm-avr32/atomic.h
new file mode 100644
index 000000000000..e0b9c44c126c
--- /dev/null
+++ b/include/asm-avr32/atomic.h
@@ -0,0 +1,201 @@
+/*
+ * Atomic operations that C can't guarantee us. Useful for
+ * resource counting etc.
+ *
+ * But use these as seldom as possible since they are slower than
+ * regular operations.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_ATOMIC_H
+#define __ASM_AVR32_ATOMIC_H
+
+#include <asm/system.h>
+
+typedef struct { volatile int counter; } atomic_t;
+#define ATOMIC_INIT(i) { (i) }
+
+#define atomic_read(v) ((v)->counter)
+#define atomic_set(v, i) (((v)->counter) = i)
+
+/*
+ * atomic_sub_return - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v. Returns the resulting value.
+ */
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+ int result;
+
+ asm volatile(
+ "/* atomic_sub_return */\n"
+ "1: ssrf 5\n"
+ " ld.w %0, %2\n"
+ " sub %0, %3\n"
+ " stcond %1, %0\n"
+ " brne 1b"
+ : "=&r"(result), "=o"(v->counter)
+ : "m"(v->counter), "ir"(i)
+ : "cc");
+
+ return result;
+}
+
+/*
+ * atomic_add_return - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v. Returns the resulting value.
+ */
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+ int result;
+
+ if (__builtin_constant_p(i))
+ result = atomic_sub_return(-i, v);
+ else
+ asm volatile(
+ "/* atomic_add_return */\n"
+ "1: ssrf 5\n"
+ " ld.w %0, %1\n"
+ " add %0, %3\n"
+ " stcond %2, %0\n"
+ " brne 1b"
+ : "=&r"(result), "=o"(v->counter)
+ : "m"(v->counter), "r"(i)
+ : "cc", "memory");
+
+ return result;
+}
+
+/*
+ * atomic_sub_unless - sub unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * If the atomic value v is not equal to u, this function subtracts a
+ * from v, and returns non zero. If v is equal to u then it returns
+ * zero. This is done as an atomic operation.
+*/
+static inline int atomic_sub_unless(atomic_t *v, int a, int u)
+{
+ int tmp, result = 0;
+
+ asm volatile(
+ "/* atomic_sub_unless */\n"
+ "1: ssrf 5\n"
+ " ld.w %0, %3\n"
+ " cp.w %0, %5\n"
+ " breq 1f\n"
+ " sub %0, %4\n"
+ " stcond %2, %0\n"
+ " brne 1b\n"
+ " mov %1, 1\n"
+ "1:"
+ : "=&r"(tmp), "=&r"(result), "=o"(v->counter)
+ : "m"(v->counter), "ir"(a), "ir"(u)
+ : "cc", "memory");
+
+ return result;
+}
+
+/*
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * If the atomic value v is not equal to u, this function adds a to v,
+ * and returns non zero. If v is equal to u then it returns zero. This
+ * is done as an atomic operation.
+*/
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int tmp, result;
+
+ if (__builtin_constant_p(a))
+ result = atomic_sub_unless(v, -a, u);
+ else {
+ result = 0;
+ asm volatile(
+ "/* atomic_add_unless */\n"
+ "1: ssrf 5\n"
+ " ld.w %0, %3\n"
+ " cp.w %0, %5\n"
+ " breq 1f\n"
+ " add %0, %4\n"
+ " stcond %2, %0\n"
+ " brne 1b\n"
+ " mov %1, 1\n"
+ "1:"
+ : "=&r"(tmp), "=&r"(result), "=o"(v->counter)
+ : "m"(v->counter), "r"(a), "ir"(u)
+ : "cc", "memory");
+ }
+
+ return result;
+}
+
+/*
+ * atomic_sub_if_positive - conditionally subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically test @v and subtract @i if @v is greater or equal than @i.
+ * The function returns the old value of @v minus @i.
+ */
+static inline int atomic_sub_if_positive(int i, atomic_t *v)
+{
+ int result;
+
+ asm volatile(
+ "/* atomic_sub_if_positive */\n"
+ "1: ssrf 5\n"
+ " ld.w %0, %2\n"
+ " sub %0, %3\n"
+ " brlt 1f\n"
+ " stcond %1, %0\n"
+ " brne 1b\n"
+ "1:"
+ : "=&r"(result), "=o"(v->counter)
+ : "m"(v->counter), "ir"(i)
+ : "cc", "memory");
+
+ return result;
+}
+
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+
+#define atomic_sub(i, v) (void)atomic_sub_return(i, v)
+#define atomic_add(i, v) (void)atomic_add_return(i, v)
+#define atomic_dec(v) atomic_sub(1, (v))
+#define atomic_inc(v) atomic_add(1, (v))
+
+#define atomic_dec_return(v) atomic_sub_return(1, v)
+#define atomic_inc_return(v) atomic_add_return(1, v)
+
+#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
+#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0)
+#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
+#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
+
+#define atomic_inc_not_zero(v) atomic_add_unless(v, 1, 0)
+#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v)
+
+#define smp_mb__before_atomic_dec() barrier()
+#define smp_mb__after_atomic_dec() barrier()
+#define smp_mb__before_atomic_inc() barrier()
+#define smp_mb__after_atomic_inc() barrier()
+
+#include <asm-generic/atomic.h>
+
+#endif /* __ASM_AVR32_ATOMIC_H */
diff --git a/include/asm-avr32/auxvec.h b/include/asm-avr32/auxvec.h
new file mode 100644
index 000000000000..d5dd435bf8f4
--- /dev/null
+++ b/include/asm-avr32/auxvec.h
@@ -0,0 +1,4 @@
+#ifndef __ASM_AVR32_AUXVEC_H
+#define __ASM_AVR32_AUXVEC_H
+
+#endif /* __ASM_AVR32_AUXVEC_H */
diff --git a/include/asm-avr32/bitops.h b/include/asm-avr32/bitops.h
new file mode 100644
index 000000000000..5299f8c8e11d
--- /dev/null
+++ b/include/asm-avr32/bitops.h
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_BITOPS_H
+#define __ASM_AVR32_BITOPS_H
+
+#include <asm/byteorder.h>
+#include <asm/system.h>
+
+/*
+ * clear_bit() doesn't provide any barrier for the compiler
+ */
+#define smp_mb__before_clear_bit() barrier()
+#define smp_mb__after_clear_bit() barrier()
+
+/*
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered. See __set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(int nr, volatile void * addr)
+{
+ unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+ unsigned long tmp;
+
+ if (__builtin_constant_p(nr)) {
+ asm volatile(
+ "1: ssrf 5\n"
+ " ld.w %0, %2\n"
+ " sbr %0, %3\n"
+ " stcond %1, %0\n"
+ " brne 1b"
+ : "=&r"(tmp), "=o"(*p)
+ : "m"(*p), "i"(nr)
+ : "cc");
+ } else {
+ unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+ asm volatile(
+ "1: ssrf 5\n"
+ " ld.w %0, %2\n"
+ " or %0, %3\n"
+ " stcond %1, %0\n"
+ " brne 1b"
+ : "=&r"(tmp), "=o"(*p)
+ : "m"(*p), "r"(mask)
+ : "cc");
+ }
+}
+
+/*
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered. However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static inline void clear_bit(int nr, volatile void * addr)
+{
+ unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+ unsigned long tmp;
+
+ if (__builtin_constant_p(nr)) {
+ asm volatile(
+ "1: ssrf 5\n"
+ " ld.w %0, %2\n"
+ " cbr %0, %3\n"
+ " stcond %1, %0\n"
+ " brne 1b"
+ : "=&r"(tmp), "=o"(*p)
+ : "m"(*p), "i"(nr)
+ : "cc");
+ } else {
+ unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+ asm volatile(
+ "1: ssrf 5\n"
+ " ld.w %0, %2\n"
+ " andn %0, %3\n"
+ " stcond %1, %0\n"
+ " brne 1b"
+ : "=&r"(tmp), "=o"(*p)
+ : "m"(*p), "r"(mask)
+ : "cc");
+ }
+}
+
+/*
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void change_bit(int nr, volatile void * addr)
+{
+ unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+ unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+ unsigned long tmp;
+
+ asm volatile(
+ "1: ssrf 5\n"
+ " ld.w %0, %2\n"
+ " eor %0, %3\n"
+ " stcond %1, %0\n"
+ " brne 1b"
+ : "=&r"(tmp), "=o"(*p)
+ : "m"(*p), "r"(mask)
+ : "cc");
+}
+
+/*
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit(int nr, volatile void * addr)
+{
+ unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+ unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+ unsigned long tmp, old;
+
+ if (__builtin_constant_p(nr)) {
+ asm volatile(
+ "1: ssrf 5\n"
+ " ld.w %0, %3\n"
+ " mov %2, %0\n"
+ " sbr %0, %4\n"
+ " stcond %1, %0\n"
+ " brne 1b"
+ : "=&r"(tmp), "=o"(*p), "=&r"(old)
+ : "m"(*p), "i"(nr)
+ : "memory", "cc");
+ } else {
+ asm volatile(
+ "1: ssrf 5\n"
+ " ld.w %2, %3\n"
+ " or %0, %2, %4\n"
+ " stcond %1, %0\n"
+ " brne 1b"
+ : "=&r"(tmp), "=o"(*p), "=&r"(old)
+ : "m"(*p), "r"(mask)
+ : "memory", "cc");
+ }
+
+ return (old & mask) != 0;
+}
+
+/*
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit(int nr, volatile void * addr)
+{
+ unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+ unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+ unsigned long tmp, old;
+
+ if (__builtin_constant_p(nr)) {
+ asm volatile(
+ "1: ssrf 5\n"
+ " ld.w %0, %3\n"
+ " mov %2, %0\n"
+ " cbr %0, %4\n"
+ " stcond %1, %0\n"
+ " brne 1b"
+ : "=&r"(tmp), "=o"(*p), "=&r"(old)
+ : "m"(*p), "i"(nr)
+ : "memory", "cc");
+ } else {
+ asm volatile(
+ "1: ssrf 5\n"
+ " ld.w %0, %3\n"
+ " mov %2, %0\n"
+ " andn %0, %4\n"
+ " stcond %1, %0\n"
+ " brne 1b"
+ : "=&r"(tmp), "=o"(*p), "=&r"(old)
+ : "m"(*p), "r"(mask)
+ : "memory", "cc");
+ }
+
+ return (old & mask) != 0;
+}
+
+/*
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(int nr, volatile void * addr)
+{
+ unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+ unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+ unsigned long tmp, old;
+
+ asm volatile(
+ "1: ssrf 5\n"
+ " ld.w %2, %3\n"
+ " eor %0, %2, %4\n"
+ " stcond %1, %0\n"
+ " brne 1b"
+ : "=&r"(tmp), "=o"(*p), "=&r"(old)
+ : "m"(*p), "r"(mask)
+ : "memory", "cc");
+
+ return (old & mask) != 0;
+}
+
+#include <asm-generic/bitops/non-atomic.h>
+
+/* Find First bit Set */
+static inline unsigned long __ffs(unsigned long word)
+{
+ unsigned long result;
+
+ asm("brev %1\n\t"
+ "clz %0,%1"
+ : "=r"(result), "=&r"(word)
+ : "1"(word));
+ return result;
+}
+
+/* Find First Zero */
+static inline unsigned long ffz(unsigned long word)
+{
+ return __ffs(~word);
+}
+
+/* Find Last bit Set */
+static inline int fls(unsigned long word)
+{
+ unsigned long result;
+
+ asm("clz %0,%1" : "=r"(result) : "r"(word));
+ return 32 - result;
+}
+
+unsigned long find_first_zero_bit(const unsigned long *addr,
+ unsigned long size);
+unsigned long find_next_zero_bit(const unsigned long *addr,
+ unsigned long size,
+ unsigned long offset);
+unsigned long find_first_bit(const unsigned long *addr,
+ unsigned long size);
+unsigned long find_next_bit(const unsigned long *addr,
+ unsigned long size,
+ unsigned long offset);
+
+/*
+ * ffs: find first bit set. This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ *
+ * The difference is that bit numbering starts at 1, and if no bit is set,
+ * the function returns 0.
+ */
+static inline int ffs(unsigned long word)
+{
+ if(word == 0)
+ return 0;
+ return __ffs(word) + 1;
+}
+
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/hweight.h>
+
+#include <asm-generic/bitops/ext2-non-atomic.h>
+#include <asm-generic/bitops/ext2-atomic.h>
+#include <asm-generic/bitops/minix-le.h>
+
+#endif /* __ASM_AVR32_BITOPS_H */
diff --git a/include/asm-avr32/bug.h b/include/asm-avr32/bug.h
new file mode 100644
index 000000000000..521766bc9366
--- /dev/null
+++ b/include/asm-avr32/bug.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_BUG_H
+#define __ASM_AVR32_BUG_H
+
+#ifdef CONFIG_BUG
+
+/*
+ * According to our Chief Architect, this compact opcode is very
+ * unlikely to ever be implemented.
+ */
+#define AVR32_BUG_OPCODE 0x5df0
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define BUG() \
+ do { \
+ asm volatile(".hword %0\n\t" \
+ ".hword %1\n\t" \
+ ".long %2" \
+ : \
+ : "n"(AVR32_BUG_OPCODE), \
+ "i"(__LINE__), "X"(__FILE__)); \
+ } while (0)
+
+#else
+
+#define BUG() \
+ do { \
+ asm volatile(".hword %0\n\t" \
+ : : "n"(AVR32_BUG_OPCODE)); \
+ } while (0)
+
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
+#define HAVE_ARCH_BUG
+
+#endif /* CONFIG_BUG */
+
+#include <asm-generic/bug.h>
+
+#endif /* __ASM_AVR32_BUG_H */
diff --git a/include/asm-avr32/bugs.h b/include/asm-avr32/bugs.h
new file mode 100644
index 000000000000..7635e770622e
--- /dev/null
+++ b/include/asm-avr32/bugs.h
@@ -0,0 +1,15 @@
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ * void check_bugs(void);
+ */
+#ifndef __ASM_AVR32_BUGS_H
+#define __ASM_AVR32_BUGS_H
+
+static void __init check_bugs(void)
+{
+ cpu_data->loops_per_jiffy = loops_per_jiffy;
+}
+
+#endif /* __ASM_AVR32_BUGS_H */
diff --git a/include/asm-avr32/byteorder.h b/include/asm-avr32/byteorder.h
new file mode 100644
index 000000000000..402ff4125cdc
--- /dev/null
+++ b/include/asm-avr32/byteorder.h
@@ -0,0 +1,25 @@
+/*
+ * AVR32 endian-conversion functions.
+ */
+#ifndef __ASM_AVR32_BYTEORDER_H
+#define __ASM_AVR32_BYTEORDER_H
+
+#include <asm/types.h>
+#include <linux/compiler.h>
+
+#ifdef __CHECKER__
+extern unsigned long __builtin_bswap_32(unsigned long x);
+extern unsigned short __builtin_bswap_16(unsigned short x);
+#endif
+
+#define __arch__swab32(x) __builtin_bswap_32(x)
+#define __arch__swab16(x) __builtin_bswap_16(x)
+
+#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+# define __BYTEORDER_HAS_U64__
+# define __SWAB_64_THRU_32__
+#endif
+
+#include <linux/byteorder/big_endian.h>
+
+#endif /* __ASM_AVR32_BYTEORDER_H */
diff --git a/include/asm-avr32/cache.h b/include/asm-avr32/cache.h
new file mode 100644
index 000000000000..dabb955f3c00
--- /dev/null
+++ b/include/asm-avr32/cache.h
@@ -0,0 +1,29 @@
+#ifndef __ASM_AVR32_CACHE_H
+#define __ASM_AVR32_CACHE_H
+
+#define L1_CACHE_SHIFT 5
+#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+
+#ifndef __ASSEMBLER__
+struct cache_info {
+ unsigned int ways;
+ unsigned int sets;
+ unsigned int linesz;
+};
+#endif /* __ASSEMBLER */
+
+/* Cache operation constants */
+#define ICACHE_FLUSH 0x00
+#define ICACHE_INVALIDATE 0x01
+#define ICACHE_LOCK 0x02
+#define ICACHE_UNLOCK 0x03
+#define ICACHE_PREFETCH 0x04
+
+#define DCACHE_FLUSH 0x08
+#define DCACHE_LOCK 0x09
+#define DCACHE_UNLOCK 0x0a
+#define DCACHE_INVALIDATE 0x0b
+#define DCACHE_CLEAN 0x0c
+#define DCACHE_CLEAN_INVAL 0x0d
+
+#endif /* __ASM_AVR32_CACHE_H */
diff --git a/include/asm-avr32/cachectl.h b/include/asm-avr32/cachectl.h
new file mode 100644
index 000000000000..4faf1ce60061
--- /dev/null
+++ b/include/asm-avr32/cachectl.h
@@ -0,0 +1,11 @@
+#ifndef __ASM_AVR32_CACHECTL_H
+#define __ASM_AVR32_CACHECTL_H
+
+/*
+ * Operations that can be performed through the cacheflush system call
+ */
+
+/* Clean the data cache, then invalidate the icache */
+#define CACHE_IFLUSH 0
+
+#endif /* __ASM_AVR32_CACHECTL_H */
diff --git a/include/asm-avr32/cacheflush.h b/include/asm-avr32/cacheflush.h
new file mode 100644
index 000000000000..f1bf1708980e
--- /dev/null
+++ b/include/asm-avr32/cacheflush.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_CACHEFLUSH_H
+#define __ASM_AVR32_CACHEFLUSH_H
+
+/* Keep includes the same across arches. */
+#include <linux/mm.h>
+
+#define CACHE_OP_ICACHE_INVALIDATE 0x01
+#define CACHE_OP_DCACHE_INVALIDATE 0x0b
+#define CACHE_OP_DCACHE_CLEAN 0x0c
+#define CACHE_OP_DCACHE_CLEAN_INVAL 0x0d
+
+/*
+ * Invalidate any cacheline containing virtual address vaddr without
+ * writing anything back to memory.
+ *
+ * Note that this function may corrupt unrelated data structures when
+ * applied on buffers that are not cacheline aligned in both ends.
+ */
+static inline void invalidate_dcache_line(void *vaddr)
+{
+ asm volatile("cache %0[0], %1"
+ :
+ : "r"(vaddr), "n"(CACHE_OP_DCACHE_INVALIDATE)
+ : "memory");
+}
+
+/*
+ * Make sure any cacheline containing virtual address vaddr is written
+ * to memory.
+ */
+static inline void clean_dcache_line(void *vaddr)
+{
+ asm volatile("cache %0[0], %1"
+ :
+ : "r"(vaddr), "n"(CACHE_OP_DCACHE_CLEAN)
+ : "memory");
+}
+
+/*
+ * Make sure any cacheline containing virtual address vaddr is written
+ * to memory and then invalidate it.
+ */
+static inline void flush_dcache_line(void *vaddr)
+{
+ asm volatile("cache %0[0], %1"
+ :
+ : "r"(vaddr), "n"(CACHE_OP_DCACHE_CLEAN_INVAL)
+ : "memory");
+}
+
+/*
+ * Invalidate any instruction cacheline containing virtual address
+ * vaddr.
+ */
+static inline void invalidate_icache_line(void *vaddr)
+{
+ asm volatile("cache %0[0], %1"
+ :
+ : "r"(vaddr), "n"(CACHE_OP_ICACHE_INVALIDATE)
+ : "memory");
+}
+
+/*
+ * Applies the above functions on all lines that are touched by the
+ * specified virtual address range.
+ */
+void invalidate_dcache_region(void *start, size_t len);
+void clean_dcache_region(void *start, size_t len);
+void flush_dcache_region(void *start, size_t len);
+void invalidate_icache_region(void *start, size_t len);
+
+/*
+ * Make sure any pending writes are completed before continuing.
+ */
+#define flush_write_buffer() asm volatile("sync 0" : : : "memory")
+
+/*
+ * The following functions are called when a virtual mapping changes.
+ * We do not need to flush anything in this case.
+ */
+#define flush_cache_all() do { } while (0)
+#define flush_cache_mm(mm) do { } while (0)
+#define flush_cache_range(vma, start, end) do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
+#define flush_cache_vmap(start, end) do { } while (0)
+#define flush_cache_vunmap(start, end) do { } while (0)
+
+/*
+ * I think we need to implement this one to be able to reliably
+ * execute pages from RAMDISK. However, if we implement the
+ * flush_dcache_*() functions, it might not be needed anymore.
+ *
+ * #define flush_icache_page(vma, page) do { } while (0)
+ */
+extern void flush_icache_page(struct vm_area_struct *vma, struct page *page);
+
+/*
+ * These are (I think) related to D-cache aliasing. We might need to
+ * do something here, but only for certain configurations. No such
+ * configurations exist at this time.
+ */
+#define flush_dcache_page(page) do { } while (0)
+#define flush_dcache_mmap_lock(page) do { } while (0)
+#define flush_dcache_mmap_unlock(page) do { } while (0)
+
+/*
+ * These are for I/D cache coherency. In this case, we do need to
+ * flush with all configurations.
+ */
+extern void flush_icache_range(unsigned long start, unsigned long end);
+extern void flush_icache_user_range(struct vm_area_struct *vma,
+ struct page *page,
+ unsigned long addr, int len);
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) do { \
+ memcpy(dst, src, len); \
+ flush_icache_user_range(vma, page, vaddr, len); \
+} while(0)
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+ memcpy(dst, src, len)
+
+#endif /* __ASM_AVR32_CACHEFLUSH_H */
diff --git a/include/asm-avr32/checksum.h b/include/asm-avr32/checksum.h
new file mode 100644
index 000000000000..41b7af09edc4
--- /dev/null
+++ b/include/asm-avr32/checksum.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_CHECKSUM_H
+#define __ASM_AVR32_CHECKSUM_H
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+unsigned int csum_partial(const unsigned char * buff, int len,
+ unsigned int sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums, and handles user-space pointer exceptions correctly, when needed.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+unsigned int csum_partial_copy_generic(const char *src, char *dst, int len,
+ int sum, int *src_err_ptr,
+ int *dst_err_ptr);
+
+/*
+ * Note: when you get a NULL pointer exception here this means someone
+ * passed in an incorrect kernel address to one of these functions.
+ *
+ * If you use these functions directly please don't forget the
+ * verify_area().
+ */
+static inline
+unsigned int csum_partial_copy_nocheck(const char *src, char *dst,
+ int len, int sum)
+{
+ return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+}
+
+static inline
+unsigned int csum_partial_copy_from_user (const char __user *src, char *dst,
+ int len, int sum, int *err_ptr)
+{
+ return csum_partial_copy_generic((const char __force *)src, dst, len,
+ sum, err_ptr, NULL);
+}
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ */
+static inline unsigned short ip_fast_csum(unsigned char *iph,
+ unsigned int ihl)
+{
+ unsigned int sum, tmp;
+
+ __asm__ __volatile__(
+ " ld.w %0, %1++\n"
+ " ld.w %3, %1++\n"
+ " sub %2, 4\n"
+ " add %0, %3\n"
+ " ld.w %3, %1++\n"
+ " adc %0, %0, %3\n"
+ " ld.w %3, %1++\n"
+ " adc %0, %0, %3\n"
+ " acr %0\n"
+ "1: ld.w %3, %1++\n"
+ " add %0, %3\n"
+ " acr %0\n"
+ " sub %2, 1\n"
+ " brne 1b\n"
+ " lsl %3, %0, 16\n"
+ " andl %0, 0\n"
+ " mov %2, 0xffff\n"
+ " add %0, %3\n"
+ " adc %0, %0, %2\n"
+ " com %0\n"
+ " lsr %0, 16\n"
+ : "=r"(sum), "=r"(iph), "=r"(ihl), "=r"(tmp)
+ : "1"(iph), "2"(ihl)
+ : "memory", "cc");
+ return sum;
+}
+
+/*
+ * Fold a partial checksum
+ */
+
+static inline unsigned int csum_fold(unsigned int sum)
+{
+ unsigned int tmp;
+
+ asm(" bfextu %1, %0, 0, 16\n"
+ " lsr %0, 16\n"
+ " add %0, %1\n"
+ " bfextu %1, %0, 16, 16\n"
+ " add %0, %1"
+ : "=&r"(sum), "=&r"(tmp)
+ : "0"(sum));
+
+ return ~sum;
+}
+
+static inline unsigned long csum_tcpudp_nofold(unsigned long saddr,
+ unsigned long daddr,
+ unsigned short len,
+ unsigned short proto,
+ unsigned int sum)
+{
+ asm(" add %0, %1\n"
+ " adc %0, %0, %2\n"
+ " adc %0, %0, %3\n"
+ " acr %0"
+ : "=r"(sum)
+ : "r"(daddr), "r"(saddr), "r"(ntohs(len) | (proto << 16)),
+ "0"(sum)
+ : "cc");
+
+ return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline unsigned short int csum_tcpudp_magic(unsigned long saddr,
+ unsigned long daddr,
+ unsigned short len,
+ unsigned short proto,
+ unsigned int sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+
+static inline unsigned short ip_compute_csum(unsigned char * buff, int len)
+{
+ return csum_fold(csum_partial(buff, len, 0));
+}
+
+#endif /* __ASM_AVR32_CHECKSUM_H */
diff --git a/include/asm-avr32/cputime.h b/include/asm-avr32/cputime.h
new file mode 100644
index 000000000000..e87e0f81cbeb
--- /dev/null
+++ b/include/asm-avr32/cputime.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_CPUTIME_H
+#define __ASM_AVR32_CPUTIME_H
+
+#include <asm-generic/cputime.h>
+
+#endif /* __ASM_AVR32_CPUTIME_H */
diff --git a/include/asm-avr32/current.h b/include/asm-avr32/current.h
new file mode 100644
index 000000000000..c7b0549eab8a
--- /dev/null
+++ b/include/asm-avr32/current.h
@@ -0,0 +1,15 @@
+#ifndef __ASM_AVR32_CURRENT_H
+#define __ASM_AVR32_CURRENT_H
+
+#include <linux/thread_info.h>
+
+struct task_struct;
+
+inline static struct task_struct * get_current(void)
+{
+ return current_thread_info()->task;
+}
+
+#define current get_current()
+
+#endif /* __ASM_AVR32_CURRENT_H */
diff --git a/include/asm-avr32/delay.h b/include/asm-avr32/delay.h
new file mode 100644
index 000000000000..cc3b2e3343b3
--- /dev/null
+++ b/include/asm-avr32/delay.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_AVR32_DELAY_H
+#define __ASM_AVR32_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/avr32/lib/delay.c
+ */
+
+extern void __bad_udelay(void);
+extern void __bad_ndelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __ndelay(unsigned long nsecs);
+extern void __const_udelay(unsigned long usecs);
+extern void __delay(unsigned long loops);
+
+#define udelay(n) (__builtin_constant_p(n) ? \
+ ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \
+ __udelay(n))
+
+#define ndelay(n) (__builtin_constant_p(n) ? \
+ ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
+ __ndelay(n))
+
+#endif /* __ASM_AVR32_DELAY_H */
diff --git a/include/asm-avr32/div64.h b/include/asm-avr32/div64.h
new file mode 100644
index 000000000000..d7ddd4fdeca6
--- /dev/null
+++ b/include/asm-avr32/div64.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_DIV64_H
+#define __ASM_AVR32_DIV64_H
+
+#include <asm-generic/div64.h>
+
+#endif /* __ASM_AVR32_DIV64_H */
diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h
new file mode 100644
index 000000000000..4c40cb41cdf8
--- /dev/null
+++ b/include/asm-avr32/dma-mapping.h
@@ -0,0 +1,320 @@
+#ifndef __ASM_AVR32_DMA_MAPPING_H
+#define __ASM_AVR32_DMA_MAPPING_H
+
+#include <linux/mm.h>
+#include <linux/device.h>
+#include <asm/scatterlist.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+
+extern void dma_cache_sync(void *vaddr, size_t size, int direction);
+
+/*
+ * Return whether the given device DMA address mask can be supported
+ * properly. For example, if your device can only drive the low 24-bits
+ * during bus mastering, then you would pass 0x00ffffff as the mask
+ * to this function.
+ */
+static inline int dma_supported(struct device *dev, u64 mask)
+{
+ /* Fix when needed. I really don't know of any limitations */
+ return 1;
+}
+
+static inline int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+ if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+ return -EIO;
+
+ *dev->dma_mask = dma_mask;
+ return 0;
+}
+
+/**
+ * dma_alloc_coherent - allocate consistent memory for DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @size: required memory size
+ * @handle: bus-specific DMA address
+ *
+ * Allocate some uncached, unbuffered memory for a device for
+ * performing DMA. This function allocates pages, and will
+ * return the CPU-viewed address, and sets @handle to be the
+ * device-viewed address.
+ */
+extern void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp);
+
+/**
+ * dma_free_coherent - free memory allocated by dma_alloc_coherent
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @size: size of memory originally requested in dma_alloc_coherent
+ * @cpu_addr: CPU-view address returned from dma_alloc_coherent
+ * @handle: device-view address returned from dma_alloc_coherent
+ *
+ * Free (and unmap) a DMA buffer previously allocated by
+ * dma_alloc_coherent().
+ *
+ * References to memory and mappings associated with cpu_addr/handle
+ * during and after this call executing are illegal.
+ */
+extern void dma_free_coherent(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t handle);
+
+/**
+ * dma_alloc_writecombine - allocate write-combining memory for DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @size: required memory size
+ * @handle: bus-specific DMA address
+ *
+ * Allocate some uncached, buffered memory for a device for
+ * performing DMA. This function allocates pages, and will
+ * return the CPU-viewed address, and sets @handle to be the
+ * device-viewed address.
+ */
+extern void *dma_alloc_writecombine(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp);
+
+/**
+ * dma_free_coherent - free memory allocated by dma_alloc_writecombine
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @size: size of memory originally requested in dma_alloc_writecombine
+ * @cpu_addr: CPU-view address returned from dma_alloc_writecombine
+ * @handle: device-view address returned from dma_alloc_writecombine
+ *
+ * Free (and unmap) a DMA buffer previously allocated by
+ * dma_alloc_writecombine().
+ *
+ * References to memory and mappings associated with cpu_addr/handle
+ * during and after this call executing are illegal.
+ */
+extern void dma_free_writecombine(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t handle);
+
+/**
+ * dma_map_single - map a single buffer for streaming DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @cpu_addr: CPU direct mapped address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Ensure that any data held in the cache is appropriately discarded
+ * or written back.
+ *
+ * The device owns this memory once this call has completed. The CPU
+ * can regain ownership by calling dma_unmap_single() or dma_sync_single().
+ */
+static inline dma_addr_t
+dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ dma_cache_sync(cpu_addr, size, direction);
+ return virt_to_bus(cpu_addr);
+}
+
+/**
+ * dma_unmap_single - unmap a single buffer previously mapped
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @handle: DMA address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Unmap a single streaming mode DMA translation. The handle and size
+ * must match what was provided in the previous dma_map_single() call.
+ * All other usages are undefined.
+ *
+ * After this call, reads by the CPU to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+static inline void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
+{
+
+}
+
+/**
+ * dma_map_page - map a portion of a page for streaming DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @page: page that buffer resides in
+ * @offset: offset into page for start of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Ensure that any data held in the cache is appropriately discarded
+ * or written back.
+ *
+ * The device owns this memory once this call has completed. The CPU
+ * can regain ownership by calling dma_unmap_page() or dma_sync_single().
+ */
+static inline dma_addr_t
+dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ return dma_map_single(dev, page_address(page) + offset,
+ size, direction);
+}
+
+/**
+ * dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @handle: DMA address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Unmap a single streaming mode DMA translation. The handle and size
+ * must match what was provided in the previous dma_map_single() call.
+ * All other usages are undefined.
+ *
+ * After this call, reads by the CPU to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+static inline void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+ enum dma_data_direction direction)
+{
+ dma_unmap_single(dev, dma_address, size, direction);
+}
+
+/**
+ * dma_map_sg - map a set of SG buffers for streaming mode DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Map a set of buffers described by scatterlist in streaming
+ * mode for DMA. This is the scatter-gather version of the
+ * above pci_map_single interface. Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length. They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ * DMA address/length pairs than there are SG table elements.
+ * (for example via virtual mapping capabilities)
+ * The routine returns the number of addr/length pairs actually
+ * used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+static inline int
+dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ int i;
+
+ for (i = 0; i < nents; i++) {
+ char *virt;
+
+ sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
+ virt = page_address(sg[i].page) + sg[i].offset;
+ dma_cache_sync(virt, sg[i].length, direction);
+ }
+
+ return nents;
+}
+
+/**
+ * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Unmap a set of streaming mode DMA translations.
+ * Again, CPU read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+static inline void
+dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+ enum dma_data_direction direction)
+{
+
+}
+
+/**
+ * dma_sync_single_for_cpu
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @handle: DMA address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Make physical memory consistent for a single streaming mode DMA
+ * translation after a transfer.
+ *
+ * If you perform a dma_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the DMA mapping,
+ * you must call this function before doing so. At the next point you
+ * give the DMA address back to the card, you must first perform a
+ * dma_sync_single_for_device, and then the device again owns the
+ * buffer.
+ */
+static inline void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction)
+{
+ dma_cache_sync(bus_to_virt(dma_handle), size, direction);
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction)
+{
+ dma_cache_sync(bus_to_virt(dma_handle), size, direction);
+}
+
+/**
+ * dma_sync_sg_for_cpu
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Make physical memory consistent for a set of streaming
+ * mode DMA translations after a transfer.
+ *
+ * The same as dma_sync_single_for_* but for a scatter-gather list,
+ * same rules and usage.
+ */
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+{
+ int i;
+
+ for (i = 0; i < nents; i++) {
+ dma_cache_sync(page_address(sg[i].page) + sg[i].offset,
+ sg[i].length, direction);
+ }
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+{
+ int i;
+
+ for (i = 0; i < nents; i++) {
+ dma_cache_sync(page_address(sg[i].page) + sg[i].offset,
+ sg[i].length, direction);
+ }
+}
+
+/* Now for the API extensions over the pci_ one */
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+static inline int dma_is_consistent(dma_addr_t dma_addr)
+{
+ return 1;
+}
+
+static inline int dma_get_cache_alignment(void)
+{
+ return boot_cpu_data.dcache.linesz;
+}
+
+#endif /* __ASM_AVR32_DMA_MAPPING_H */
diff --git a/include/asm-avr32/dma.h b/include/asm-avr32/dma.h
new file mode 100644
index 000000000000..9e91205590ac
--- /dev/null
+++ b/include/asm-avr32/dma.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_AVR32_DMA_H
+#define __ASM_AVR32_DMA_H
+
+/* The maximum address that we can perform a DMA transfer to on this platform.
+ * Not really applicable to AVR32, but some functions need it. */
+#define MAX_DMA_ADDRESS 0xffffffff
+
+#endif /* __ASM_AVR32_DMA_H */
diff --git a/include/asm-avr32/elf.h b/include/asm-avr32/elf.h
new file mode 100644
index 000000000000..d334b4994d2d
--- /dev/null
+++ b/include/asm-avr32/elf.h
@@ -0,0 +1,110 @@
+#ifndef __ASM_AVR32_ELF_H
+#define __ASM_AVR32_ELF_H
+
+/* AVR32 relocation numbers */
+#define R_AVR32_NONE 0
+#define R_AVR32_32 1
+#define R_AVR32_16 2
+#define R_AVR32_8 3
+#define R_AVR32_32_PCREL 4
+#define R_AVR32_16_PCREL 5
+#define R_AVR32_8_PCREL 6
+#define R_AVR32_DIFF32 7
+#define R_AVR32_DIFF16 8
+#define R_AVR32_DIFF8 9
+#define R_AVR32_GOT32 10
+#define R_AVR32_GOT16 11
+#define R_AVR32_GOT8 12
+#define R_AVR32_21S 13
+#define R_AVR32_16U 14
+#define R_AVR32_16S 15
+#define R_AVR32_8S 16
+#define R_AVR32_8S_EXT 17
+#define R_AVR32_22H_PCREL 18
+#define R_AVR32_18W_PCREL 19
+#define R_AVR32_16B_PCREL 20
+#define R_AVR32_16N_PCREL 21
+#define R_AVR32_14UW_PCREL 22
+#define R_AVR32_11H_PCREL 23
+#define R_AVR32_10UW_PCREL 24
+#define R_AVR32_9H_PCREL 25
+#define R_AVR32_9UW_PCREL 26
+#define R_AVR32_HI16 27
+#define R_AVR32_LO16 28
+#define R_AVR32_GOTPC 29
+#define R_AVR32_GOTCALL 30
+#define R_AVR32_LDA_GOT 31
+#define R_AVR32_GOT21S 32
+#define R_AVR32_GOT18SW 33
+#define R_AVR32_GOT16S 34
+#define R_AVR32_GOT7UW 35
+#define R_AVR32_32_CPENT 36
+#define R_AVR32_CPCALL 37
+#define R_AVR32_16_CP 38
+#define R_AVR32_9W_CP 39
+#define R_AVR32_RELATIVE 40
+#define R_AVR32_GLOB_DAT 41
+#define R_AVR32_JMP_SLOT 42
+#define R_AVR32_ALIGN 43
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct pt_regs) / sizeof (elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct user_fpu_struct elf_fpregset_t;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ( (x)->e_machine == EM_AVR32 )
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS ELFCLASS32
+#ifdef __LITTLE_ENDIAN__
+#define ELF_DATA ELFDATA2LSB
+#else
+#define ELF_DATA ELFDATA2MSB
+#endif
+#define ELF_ARCH EM_AVR32
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE 4096
+
+/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
+ use of this is to invoke "./ld.so someprog" to test out a new version of
+ the loader. We need to make sure that it is out of the way of the program
+ that it will "exec", and that there is sufficient room for the brk. */
+
+#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
+
+
+/* This yields a mask that user programs can use to figure out what
+ instruction set this CPU supports. This could be done in user space,
+ but it's not easy, and we've already done it here. */
+
+#define ELF_HWCAP (0)
+
+/* This yields a string that ld.so will use to load implementation
+ specific libraries for optimization. This is more specific in
+ intent than poking at uname or /proc/cpuinfo.
+
+ For the moment, we have only optimizations for the Intel generations,
+ but that could change... */
+
+#define ELF_PLATFORM (NULL)
+
+#ifdef __KERNEL__
+#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT)
+#endif
+
+#endif /* __ASM_AVR32_ELF_H */
diff --git a/include/asm-avr32/emergency-restart.h b/include/asm-avr32/emergency-restart.h
new file mode 100644
index 000000000000..3e7e014776ba
--- /dev/null
+++ b/include/asm-avr32/emergency-restart.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_EMERGENCY_RESTART_H
+#define __ASM_AVR32_EMERGENCY_RESTART_H
+
+#include <asm-generic/emergency-restart.h>
+
+#endif /* __ASM_AVR32_EMERGENCY_RESTART_H */
diff --git a/include/asm-avr32/errno.h b/include/asm-avr32/errno.h
new file mode 100644
index 000000000000..558a7249f06d
--- /dev/null
+++ b/include/asm-avr32/errno.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_ERRNO_H
+#define __ASM_AVR32_ERRNO_H
+
+#include <asm-generic/errno.h>
+
+#endif /* __ASM_AVR32_ERRNO_H */
diff --git a/include/asm-avr32/fcntl.h b/include/asm-avr32/fcntl.h
new file mode 100644
index 000000000000..14c0c4402b11
--- /dev/null
+++ b/include/asm-avr32/fcntl.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_FCNTL_H
+#define __ASM_AVR32_FCNTL_H
+
+#include <asm-generic/fcntl.h>
+
+#endif /* __ASM_AVR32_FCNTL_H */
diff --git a/include/asm-avr32/futex.h b/include/asm-avr32/futex.h
new file mode 100644
index 000000000000..10419f14a68a
--- /dev/null
+++ b/include/asm-avr32/futex.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_FUTEX_H
+#define __ASM_AVR32_FUTEX_H
+
+#include <asm-generic/futex.h>
+
+#endif /* __ASM_AVR32_FUTEX_H */
diff --git a/include/asm-avr32/hardirq.h b/include/asm-avr32/hardirq.h
new file mode 100644
index 000000000000..267354356f60
--- /dev/null
+++ b/include/asm-avr32/hardirq.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_AVR32_HARDIRQ_H
+#define __ASM_AVR32_HARDIRQ_H
+
+#include <linux/threads.h>
+#include <asm/irq.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/cache.h>
+
+/* entry.S is sensitive to the offsets of these fields */
+typedef struct {
+ unsigned int __softirq_pending;
+} ____cacheline_aligned irq_cpustat_t;
+
+void ack_bad_irq(unsigned int irq);
+
+/* Standard mappings for irq_cpustat_t above */
+#include <linux/irq_cpustat.h>
+
+#endif /* __ASSEMBLY__ */
+
+#define HARDIRQ_BITS 12
+
+/*
+ * The hardirq mask has to be large enough to have
+ * space for potentially all IRQ sources in the system
+ * nesting on a single CPU:
+ */
+#if (1 << HARDIRQ_BITS) < NR_IRQS
+# error HARDIRQ_BITS is too low!
+#endif
+
+#endif /* __ASM_AVR32_HARDIRQ_H */
diff --git a/include/asm-avr32/hw_irq.h b/include/asm-avr32/hw_irq.h
new file mode 100644
index 000000000000..218b0a6bfd1b
--- /dev/null
+++ b/include/asm-avr32/hw_irq.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_AVR32_HW_IRQ_H
+#define __ASM_AVR32_HW_IRQ_H
+
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i)
+{
+ /* Nothing to do */
+}
+
+#endif /* __ASM_AVR32_HW_IRQ_H */
diff --git a/include/asm-avr32/intc.h b/include/asm-avr32/intc.h
new file mode 100644
index 000000000000..1ac9ca75e8fd
--- /dev/null
+++ b/include/asm-avr32/intc.h
@@ -0,0 +1,128 @@
+#ifndef __ASM_AVR32_INTC_H
+#define __ASM_AVR32_INTC_H
+
+#include <linux/sysdev.h>
+#include <linux/interrupt.h>
+
+struct irq_controller;
+struct irqaction;
+struct pt_regs;
+
+struct platform_device;
+
+/* Information about the internal interrupt controller */
+struct intc_device {
+ /* ioremapped address of configuration block */
+ void __iomem *regs;
+
+ /* the physical device */
+ struct platform_device *pdev;
+
+ /* Number of interrupt lines per group. */
+ unsigned int irqs_per_group;
+
+ /* The highest group ID + 1 */
+ unsigned int nr_groups;
+
+ /*
+ * Bitfield indicating which groups are actually in use. The
+ * size of the array is
+ * ceil(group_max / (8 * sizeof(unsigned int))).
+ */
+ unsigned int group_mask[];
+};
+
+struct irq_controller_class {
+ /*
+ * A short name identifying this kind of controller.
+ */
+ const char *typename;
+ /*
+ * Handle the IRQ. Must do any necessary acking and masking.
+ */
+ irqreturn_t (*handle)(int irq, void *dev_id, struct pt_regs *regs);
+ /*
+ * Register a new IRQ handler.
+ */
+ int (*setup)(struct irq_controller *ctrl, unsigned int irq,
+ struct irqaction *action);
+ /*
+ * Unregister a IRQ handler.
+ */
+ void (*free)(struct irq_controller *ctrl, unsigned int irq,
+ void *dev_id);
+ /*
+ * Mask the IRQ in the interrupt controller.
+ */
+ void (*mask)(struct irq_controller *ctrl, unsigned int irq);
+ /*
+ * Unmask the IRQ in the interrupt controller.
+ */
+ void (*unmask)(struct irq_controller *ctrl, unsigned int irq);
+ /*
+ * Set the type of the IRQ. See below for possible types.
+ * Return -EINVAL if a given type is not supported
+ */
+ int (*set_type)(struct irq_controller *ctrl, unsigned int irq,
+ unsigned int type);
+ /*
+ * Return the IRQ type currently set
+ */
+ unsigned int (*get_type)(struct irq_controller *ctrl, unsigned int irq);
+};
+
+struct irq_controller {
+ struct irq_controller_class *class;
+ unsigned int irq_group;
+ unsigned int first_irq;
+ unsigned int nr_irqs;
+ struct list_head list;
+};
+
+struct intc_group_desc {
+ struct irq_controller *ctrl;
+ irqreturn_t (*handle)(int, void *, struct pt_regs *);
+ unsigned long flags;
+ void *dev_id;
+ const char *devname;
+};
+
+/*
+ * The internal interrupt controller. Defined in board/part-specific
+ * devices.c.
+ * TODO: Should probably be defined per-cpu.
+ */
+extern struct intc_device intc;
+
+extern int request_internal_irq(unsigned int irq,
+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags,
+ const char *devname, void *dev_id);
+extern void free_internal_irq(unsigned int irq);
+
+/* Only used by time_init() */
+extern int setup_internal_irq(unsigned int irq, struct intc_group_desc *desc);
+
+/*
+ * Set interrupt priority for a given group. `group' can be found by
+ * using irq_to_group(irq). Priority can be from 0 (lowest) to 3
+ * (highest). Higher-priority interrupts will preempt lower-priority
+ * interrupts (unless interrupts are masked globally).
+ *
+ * This function does not check for conflicts within a group.
+ */
+extern int intc_set_priority(unsigned int group,
+ unsigned int priority);
+
+/*
+ * Returns a bitmask of pending interrupts in a group.
+ */
+extern unsigned long intc_get_pending(unsigned int group);
+
+/*
+ * Register a new external interrupt controller. Returns the first
+ * external IRQ number that is assigned to the new controller.
+ */
+extern int intc_register_controller(struct irq_controller *ctrl);
+
+#endif /* __ASM_AVR32_INTC_H */
diff --git a/include/asm-avr32/io.h b/include/asm-avr32/io.h
new file mode 100644
index 000000000000..2fc8f111dce9
--- /dev/null
+++ b/include/asm-avr32/io.h
@@ -0,0 +1,253 @@
+#ifndef __ASM_AVR32_IO_H
+#define __ASM_AVR32_IO_H
+
+#include <linux/string.h>
+
+#ifdef __KERNEL__
+
+#include <asm/addrspace.h>
+#include <asm/byteorder.h>
+
+/* virt_to_phys will only work when address is in P1 or P2 */
+static __inline__ unsigned long virt_to_phys(volatile void *address)
+{
+ return PHYSADDR(address);
+}
+
+static __inline__ void * phys_to_virt(unsigned long address)
+{
+ return (void *)P1SEGADDR(address);
+}
+
+#define cached_to_phys(addr) ((unsigned long)PHYSADDR(addr))
+#define uncached_to_phys(addr) ((unsigned long)PHYSADDR(addr))
+#define phys_to_cached(addr) ((void *)P1SEGADDR(addr))
+#define phys_to_uncached(addr) ((void *)P2SEGADDR(addr))
+
+/*
+ * Generic IO read/write. These perform native-endian accesses. Note
+ * that some architectures will want to re-define __raw_{read,write}w.
+ */
+extern void __raw_writesb(unsigned int addr, const void *data, int bytelen);
+extern void __raw_writesw(unsigned int addr, const void *data, int wordlen);
+extern void __raw_writesl(unsigned int addr, const void *data, int longlen);
+
+extern void __raw_readsb(unsigned int addr, void *data, int bytelen);
+extern void __raw_readsw(unsigned int addr, void *data, int wordlen);
+extern void __raw_readsl(unsigned int addr, void *data, int longlen);
+
+static inline void writeb(unsigned char b, volatile void __iomem *addr)
+{
+ *(volatile unsigned char __force *)addr = b;
+}
+static inline void writew(unsigned short b, volatile void __iomem *addr)
+{
+ *(volatile unsigned short __force *)addr = b;
+}
+static inline void writel(unsigned int b, volatile void __iomem *addr)
+{
+ *(volatile unsigned int __force *)addr = b;
+}
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+
+static inline unsigned char readb(const volatile void __iomem *addr)
+{
+ return *(const volatile unsigned char __force *)addr;
+}
+static inline unsigned short readw(const volatile void __iomem *addr)
+{
+ return *(const volatile unsigned short __force *)addr;
+}
+static inline unsigned int readl(const volatile void __iomem *addr)
+{
+ return *(const volatile unsigned int __force *)addr;
+}
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+
+#define writesb(p, d, l) __raw_writesb((unsigned int)p, d, l)
+#define writesw(p, d, l) __raw_writesw((unsigned int)p, d, l)
+#define writesl(p, d, l) __raw_writesl((unsigned int)p, d, l)
+
+#define readsb(p, d, l) __raw_readsb((unsigned int)p, d, l)
+#define readsw(p, d, l) __raw_readsw((unsigned int)p, d, l)
+#define readsl(p, d, l) __raw_readsl((unsigned int)p, d, l)
+
+/*
+ * These two are only here because ALSA _thinks_ it needs them...
+ */
+static inline void memcpy_fromio(void * to, const volatile void __iomem *from,
+ unsigned long count)
+{
+ char *p = to;
+ while (count) {
+ count--;
+ *p = readb(from);
+ p++;
+ from++;
+ }
+}
+
+static inline void memcpy_toio(volatile void __iomem *to, const void * from,
+ unsigned long count)
+{
+ const char *p = from;
+ while (count) {
+ count--;
+ writeb(*p, to);
+ p++;
+ to++;
+ }
+}
+
+static inline void memset_io(volatile void __iomem *addr, unsigned char val,
+ unsigned long count)
+{
+ memset((void __force *)addr, val, count);
+}
+
+/*
+ * Bad read/write accesses...
+ */
+extern void __readwrite_bug(const char *fn);
+
+#define IO_SPACE_LIMIT 0xffffffff
+
+/* Convert I/O port address to virtual address */
+#define __io(p) ((void __iomem *)phys_to_uncached(p))
+
+/*
+ * IO port access primitives
+ * -------------------------
+ *
+ * The AVR32 doesn't have special IO access instructions; all IO is memory
+ * mapped. Note that these are defined to perform little endian accesses
+ * only. Their primary purpose is to access PCI and ISA peripherals.
+ *
+ * Note that for a big endian machine, this implies that the following
+ * big endian mode connectivity is in place.
+ *
+ * The machine specific io.h include defines __io to translate an "IO"
+ * address to a memory address.
+ *
+ * Note that we prevent GCC re-ordering or caching values in expressions
+ * by introducing sequence points into the in*() definitions. Note that
+ * __raw_* do not guarantee this behaviour.
+ *
+ * The {in,out}[bwl] macros are for emulating x86-style PCI/ISA IO space.
+ */
+#define outb(v, p) __raw_writeb(v, __io(p))
+#define outw(v, p) __raw_writew(cpu_to_le16(v), __io(p))
+#define outl(v, p) __raw_writel(cpu_to_le32(v), __io(p))
+
+#define inb(p) __raw_readb(__io(p))
+#define inw(p) le16_to_cpu(__raw_readw(__io(p)))
+#define inl(p) le32_to_cpu(__raw_readl(__io(p)))
+
+static inline void __outsb(unsigned long port, void *addr, unsigned int count)
+{
+ while (count--) {
+ outb(*(u8 *)addr, port);
+ addr++;
+ }
+}
+
+static inline void __insb(unsigned long port, void *addr, unsigned int count)
+{
+ while (count--) {
+ *(u8 *)addr = inb(port);
+ addr++;
+ }
+}
+
+static inline void __outsw(unsigned long port, void *addr, unsigned int count)
+{
+ while (count--) {
+ outw(*(u16 *)addr, port);
+ addr += 2;
+ }
+}
+
+static inline void __insw(unsigned long port, void *addr, unsigned int count)
+{
+ while (count--) {
+ *(u16 *)addr = inw(port);
+ addr += 2;
+ }
+}
+
+static inline void __outsl(unsigned long port, void *addr, unsigned int count)
+{
+ while (count--) {
+ outl(*(u32 *)addr, port);
+ addr += 4;
+ }
+}
+
+static inline void __insl(unsigned long port, void *addr, unsigned int count)
+{
+ while (count--) {
+ *(u32 *)addr = inl(port);
+ addr += 4;
+ }
+}
+
+#define outsb(port, addr, count) __outsb(port, addr, count)
+#define insb(port, addr, count) __insb(port, addr, count)
+#define outsw(port, addr, count) __outsw(port, addr, count)
+#define insw(port, addr, count) __insw(port, addr, count)
+#define outsl(port, addr, count) __outsl(port, addr, count)
+#define insl(port, addr, count) __insl(port, addr, count)
+
+extern void __iomem *__ioremap(unsigned long offset, size_t size,
+ unsigned long flags);
+extern void __iounmap(void __iomem *addr);
+
+/*
+ * ioremap - map bus memory into CPU space
+ * @offset bus address of the memory
+ * @size size of the resource to map
+ *
+ * ioremap performs a platform specific sequence of operations to make
+ * bus memory CPU accessible via the readb/.../writel functions and
+ * the other mmio helpers. The returned address is not guaranteed to
+ * be usable directly as a virtual address.
+ */
+#define ioremap(offset, size) \
+ __ioremap((offset), (size), 0)
+
+#define iounmap(addr) \
+ __iounmap(addr)
+
+#define cached(addr) P1SEGADDR(addr)
+#define uncached(addr) P2SEGADDR(addr)
+
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
+#define page_to_bus page_to_phys
+#define bus_to_page phys_to_page
+
+#define dma_cache_wback_inv(_start, _size) \
+ flush_dcache_region(_start, _size)
+#define dma_cache_inv(_start, _size) \
+ invalidate_dcache_region(_start, _size)
+#define dma_cache_wback(_start, _size) \
+ clean_dcache_region(_start, _size)
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p) p
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_AVR32_IO_H */
diff --git a/include/asm-avr32/ioctl.h b/include/asm-avr32/ioctl.h
new file mode 100644
index 000000000000..c8472c1398ef
--- /dev/null
+++ b/include/asm-avr32/ioctl.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_IOCTL_H
+#define __ASM_AVR32_IOCTL_H
+
+#include <asm-generic/ioctl.h>
+
+#endif /* __ASM_AVR32_IOCTL_H */
diff --git a/include/asm-avr32/ioctls.h b/include/asm-avr32/ioctls.h
new file mode 100644
index 000000000000..0500426b7186
--- /dev/null
+++ b/include/asm-avr32/ioctls.h
@@ -0,0 +1,83 @@
+#ifndef __ASM_AVR32_IOCTLS_H
+#define __ASM_AVR32_IOCTLS_H
+
+#include <asm/ioctl.h>
+
+/* 0x54 is just a magic number to make these relatively unique ('T') */
+
+#define TCGETS 0x5401
+#define TCSETS 0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */
+#define TCSETSW 0x5403
+#define TCSETSF 0x5404
+#define TCGETA 0x5405
+#define TCSETA 0x5406
+#define TCSETAW 0x5407
+#define TCSETAF 0x5408
+#define TCSBRK 0x5409
+#define TCXONC 0x540A
+#define TCFLSH 0x540B
+#define TIOCEXCL 0x540C
+#define TIOCNXCL 0x540D
+#define TIOCSCTTY 0x540E
+#define TIOCGPGRP 0x540F
+#define TIOCSPGRP 0x5410
+#define TIOCOUTQ 0x5411
+#define TIOCSTI 0x5412
+#define TIOCGWINSZ 0x5413
+#define TIOCSWINSZ 0x5414
+#define TIOCMGET 0x5415
+#define TIOCMBIS 0x5416
+#define TIOCMBIC 0x5417
+#define TIOCMSET 0x5418
+#define TIOCGSOFTCAR 0x5419
+#define TIOCSSOFTCAR 0x541A
+#define FIONREAD 0x541B
+#define TIOCINQ FIONREAD
+#define TIOCLINUX 0x541C
+#define TIOCCONS 0x541D
+#define TIOCGSERIAL 0x541E
+#define TIOCSSERIAL 0x541F
+#define TIOCPKT 0x5420
+#define FIONBIO 0x5421
+#define TIOCNOTTY 0x5422
+#define TIOCSETD 0x5423
+#define TIOCGETD 0x5424
+#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */
+/* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */
+#define TIOCSBRK 0x5427 /* BSD compatibility */
+#define TIOCCBRK 0x5428 /* BSD compatibility */
+#define TIOCGSID 0x5429 /* Return the session ID of FD */
+#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */
+
+#define FIONCLEX 0x5450
+#define FIOCLEX 0x5451
+#define FIOASYNC 0x5452
+#define TIOCSERCONFIG 0x5453
+#define TIOCSERGWILD 0x5454
+#define TIOCSERSWILD 0x5455
+#define TIOCGLCKTRMIOS 0x5456
+#define TIOCSLCKTRMIOS 0x5457
+#define TIOCSERGSTRUCT 0x5458 /* For debugging only */
+#define TIOCSERGETLSR 0x5459 /* Get line status register */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+
+#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */
+#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */
+#define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */
+#define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */
+#define FIOQSIZE 0x5460
+
+/* Used for packet mode */
+#define TIOCPKT_DATA 0
+#define TIOCPKT_FLUSHREAD 1
+#define TIOCPKT_FLUSHWRITE 2
+#define TIOCPKT_STOP 4
+#define TIOCPKT_START 8
+#define TIOCPKT_NOSTOP 16
+#define TIOCPKT_DOSTOP 32
+
+#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */
+
+#endif /* __ASM_AVR32_IOCTLS_H */
diff --git a/include/asm-avr32/ipcbuf.h b/include/asm-avr32/ipcbuf.h
new file mode 100644
index 000000000000..1552c9698f5e
--- /dev/null
+++ b/include/asm-avr32/ipcbuf.h
@@ -0,0 +1,29 @@
+#ifndef __ASM_AVR32_IPCBUF_H
+#define __ASM_AVR32_IPCBUF_H
+
+/*
+* The user_ipc_perm structure for AVR32 architecture.
+* Note extra padding because this structure is passed back and forth
+* between kernel and user space.
+*
+* Pad space is left for:
+* - 32-bit mode_t and seq
+* - 2 miscellaneous 32-bit values
+*/
+
+struct ipc64_perm
+{
+ __kernel_key_t key;
+ __kernel_uid32_t uid;
+ __kernel_gid32_t gid;
+ __kernel_uid32_t cuid;
+ __kernel_gid32_t cgid;
+ __kernel_mode_t mode;
+ unsigned short __pad1;
+ unsigned short seq;
+ unsigned short __pad2;
+ unsigned long __unused1;
+ unsigned long __unused2;
+};
+
+#endif /* __ASM_AVR32_IPCBUF_H */
diff --git a/include/asm-avr32/irq.h b/include/asm-avr32/irq.h
new file mode 100644
index 000000000000..f7e725707dd7
--- /dev/null
+++ b/include/asm-avr32/irq.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_AVR32_IRQ_H
+#define __ASM_AVR32_IRQ_H
+
+#define NR_INTERNAL_IRQS 64
+#define NR_EXTERNAL_IRQS 64
+#define NR_IRQS (NR_INTERNAL_IRQS + NR_EXTERNAL_IRQS)
+
+#define irq_canonicalize(i) (i)
+
+#endif /* __ASM_AVR32_IOCTLS_H */
diff --git a/include/asm-avr32/irqflags.h b/include/asm-avr32/irqflags.h
new file mode 100644
index 000000000000..93570daac38a
--- /dev/null
+++ b/include/asm-avr32/irqflags.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_IRQFLAGS_H
+#define __ASM_AVR32_IRQFLAGS_H
+
+#include <asm/sysreg.h>
+
+static inline unsigned long __raw_local_save_flags(void)
+{
+ return sysreg_read(SR);
+}
+
+#define raw_local_save_flags(x) \
+ do { (x) = __raw_local_save_flags(); } while (0)
+
+/*
+ * This will restore ALL status register flags, not only the interrupt
+ * mask flag.
+ *
+ * The empty asm statement informs the compiler of this fact while
+ * also serving as a barrier.
+ */
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+ sysreg_write(SR, flags);
+ asm volatile("" : : : "memory", "cc");
+}
+
+static inline void raw_local_irq_disable(void)
+{
+ asm volatile("ssrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory");
+}
+
+static inline void raw_local_irq_enable(void)
+{
+ asm volatile("csrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory");
+}
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+ return (flags & SYSREG_BIT(GM)) != 0;
+}
+
+static inline int raw_irqs_disabled(void)
+{
+ unsigned long flags = __raw_local_save_flags();
+
+ return raw_irqs_disabled_flags(flags);
+}
+
+static inline unsigned long __raw_local_irq_save(void)
+{
+ unsigned long flags = __raw_local_save_flags();
+
+ raw_local_irq_disable();
+
+ return flags;
+}
+
+#define raw_local_irq_save(flags) \
+ do { (flags) = __raw_local_irq_save(); } while (0)
+
+#endif /* __ASM_AVR32_IRQFLAGS_H */
diff --git a/include/asm-avr32/kdebug.h b/include/asm-avr32/kdebug.h
new file mode 100644
index 000000000000..f583b643ffb2
--- /dev/null
+++ b/include/asm-avr32/kdebug.h
@@ -0,0 +1,38 @@
+#ifndef __ASM_AVR32_KDEBUG_H
+#define __ASM_AVR32_KDEBUG_H
+
+#include <linux/notifier.h>
+
+struct pt_regs;
+
+struct die_args {
+ struct pt_regs *regs;
+ int trapnr;
+};
+
+int register_die_notifier(struct notifier_block *nb);
+int unregister_die_notifier(struct notifier_block *nb);
+int register_page_fault_notifier(struct notifier_block *nb);
+int unregister_page_fault_notifier(struct notifier_block *nb);
+extern struct atomic_notifier_head avr32_die_chain;
+
+/* Grossly misnamed. */
+enum die_val {
+ DIE_FAULT,
+ DIE_BREAKPOINT,
+ DIE_SSTEP,
+ DIE_PAGE_FAULT,
+};
+
+static inline int notify_die(enum die_val val, struct pt_regs *regs,
+ int trap, int sig)
+{
+ struct die_args args = {
+ .regs = regs,
+ .trapnr = trap,
+ };
+
+ return atomic_notifier_call_chain(&avr32_die_chain, val, &args);
+}
+
+#endif /* __ASM_AVR32_KDEBUG_H */
diff --git a/include/asm-avr32/kmap_types.h b/include/asm-avr32/kmap_types.h
new file mode 100644
index 000000000000..b7f5c6870107
--- /dev/null
+++ b/include/asm-avr32/kmap_types.h
@@ -0,0 +1,30 @@
+#ifndef __ASM_AVR32_KMAP_TYPES_H
+#define __ASM_AVR32_KMAP_TYPES_H
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+# define D(n) __KM_FENCE_##n ,
+#else
+# define D(n)
+#endif
+
+enum km_type {
+D(0) KM_BOUNCE_READ,
+D(1) KM_SKB_SUNRPC_DATA,
+D(2) KM_SKB_DATA_SOFTIRQ,
+D(3) KM_USER0,
+D(4) KM_USER1,
+D(5) KM_BIO_SRC_IRQ,
+D(6) KM_BIO_DST_IRQ,
+D(7) KM_PTE0,
+D(8) KM_PTE1,
+D(9) KM_PTE2,
+D(10) KM_IRQ0,
+D(11) KM_IRQ1,
+D(12) KM_SOFTIRQ0,
+D(13) KM_SOFTIRQ1,
+D(14) KM_TYPE_NR
+};
+
+#undef D
+
+#endif /* __ASM_AVR32_KMAP_TYPES_H */
diff --git a/include/asm-avr32/kprobes.h b/include/asm-avr32/kprobes.h
new file mode 100644
index 000000000000..09a5cbe2f896
--- /dev/null
+++ b/include/asm-avr32/kprobes.h
@@ -0,0 +1,34 @@
+/*
+ * Kernel Probes (KProbes)
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_KPROBES_H
+#define __ASM_AVR32_KPROBES_H
+
+#include <linux/types.h>
+
+typedef u16 kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION 0xd673 /* breakpoint */
+#define MAX_INSN_SIZE 2
+
+#define ARCH_INACTIVE_KPROBE_COUNT 1
+
+#define arch_remove_kprobe(p) do { } while (0)
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+ kprobe_opcode_t insn[MAX_INSN_SIZE];
+};
+
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data);
+
+#define flush_insn_slot(p) do { } while (0)
+
+#endif /* __ASM_AVR32_KPROBES_H */
diff --git a/include/asm-avr32/linkage.h b/include/asm-avr32/linkage.h
new file mode 100644
index 000000000000..f7b285e910d4
--- /dev/null
+++ b/include/asm-avr32/linkage.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#define __ALIGN .balign 2
+#define __ALIGN_STR ".balign 2"
+
+#endif /* __ASM_LINKAGE_H */
diff --git a/include/asm-avr32/local.h b/include/asm-avr32/local.h
new file mode 100644
index 000000000000..1c1619694da3
--- /dev/null
+++ b/include/asm-avr32/local.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_LOCAL_H
+#define __ASM_AVR32_LOCAL_H
+
+#include <asm-generic/local.h>
+
+#endif /* __ASM_AVR32_LOCAL_H */
diff --git a/include/asm-avr32/mach/serial_at91.h b/include/asm-avr32/mach/serial_at91.h
new file mode 100644
index 000000000000..1290bb32802d
--- /dev/null
+++ b/include/asm-avr32/mach/serial_at91.h
@@ -0,0 +1,33 @@
+/*
+ * linux/include/asm-arm/mach/serial_at91.h
+ *
+ * Based on serial_sa1100.h by Nicolas Pitre
+ *
+ * Copyright (C) 2002 ATMEL Rousset
+ *
+ * Low level machine dependent UART functions.
+ */
+
+struct uart_port;
+
+/*
+ * This is a temporary structure for registering these
+ * functions; it is intended to be discarded after boot.
+ */
+struct at91_port_fns {
+ void (*set_mctrl)(struct uart_port *, u_int);
+ u_int (*get_mctrl)(struct uart_port *);
+ void (*enable_ms)(struct uart_port *);
+ void (*pm)(struct uart_port *, u_int, u_int);
+ int (*set_wake)(struct uart_port *, u_int);
+ int (*open)(struct uart_port *);
+ void (*close)(struct uart_port *);
+};
+
+#if defined(CONFIG_SERIAL_AT91)
+void at91_register_uart_fns(struct at91_port_fns *fns);
+#else
+#define at91_register_uart_fns(fns) do { } while (0)
+#endif
+
+
diff --git a/include/asm-avr32/mman.h b/include/asm-avr32/mman.h
new file mode 100644
index 000000000000..648f91e7187a
--- /dev/null
+++ b/include/asm-avr32/mman.h
@@ -0,0 +1,17 @@
+#ifndef __ASM_AVR32_MMAN_H__
+#define __ASM_AVR32_MMAN_H__
+
+#include <asm-generic/mman.h>
+
+#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
+#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
+#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
+#define MAP_LOCKED 0x2000 /* pages are locked */
+#define MAP_NORESERVE 0x4000 /* don't check for reservations */
+#define MAP_POPULATE 0x8000 /* populate (prefault) page tables */
+#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+
+#define MCL_CURRENT 1 /* lock all current mappings */
+#define MCL_FUTURE 2 /* lock all future mappings */
+
+#endif /* __ASM_AVR32_MMAN_H__ */
diff --git a/include/asm-avr32/mmu.h b/include/asm-avr32/mmu.h
new file mode 100644
index 000000000000..60c2d2650d32
--- /dev/null
+++ b/include/asm-avr32/mmu.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_AVR32_MMU_H
+#define __ASM_AVR32_MMU_H
+
+/* Default "unsigned long" context */
+typedef unsigned long mm_context_t;
+
+#define MMU_ITLB_ENTRIES 64
+#define MMU_DTLB_ENTRIES 64
+
+#endif /* __ASM_AVR32_MMU_H */
diff --git a/include/asm-avr32/mmu_context.h b/include/asm-avr32/mmu_context.h
new file mode 100644
index 000000000000..31add1ae8089
--- /dev/null
+++ b/include/asm-avr32/mmu_context.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * ASID handling taken from SH implementation.
+ * Copyright (C) 1999 Niibe Yutaka
+ * Copyright (C) 2003 Paul Mundt
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_MMU_CONTEXT_H
+#define __ASM_AVR32_MMU_CONTEXT_H
+
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm/sysreg.h>
+
+/*
+ * The MMU "context" consists of two things:
+ * (a) TLB cache version
+ * (b) ASID (Address Space IDentifier)
+ */
+#define MMU_CONTEXT_ASID_MASK 0x000000ff
+#define MMU_CONTEXT_VERSION_MASK 0xffffff00
+#define MMU_CONTEXT_FIRST_VERSION 0x00000100
+#define NO_CONTEXT 0
+
+#define MMU_NO_ASID 0x100
+
+/* Virtual Page Number mask */
+#define MMU_VPN_MASK 0xfffff000
+
+/* Cache of MMU context last used */
+extern unsigned long mmu_context_cache;
+
+/*
+ * Get MMU context if needed
+ */
+static inline void
+get_mmu_context(struct mm_struct *mm)
+{
+ unsigned long mc = mmu_context_cache;
+
+ if (((mm->context ^ mc) & MMU_CONTEXT_VERSION_MASK) == 0)
+ /* It's up to date, do nothing */
+ return;
+
+ /* It's old, we need to get new context with new version */
+ mc = ++mmu_context_cache;
+ if (!(mc & MMU_CONTEXT_ASID_MASK)) {
+ /*
+ * We have exhausted all ASIDs of this version.
+ * Flush the TLB and start new cycle.
+ */
+ flush_tlb_all();
+ /*
+ * Fix version. Note that we avoid version #0
+ * to distinguish NO_CONTEXT.
+ */
+ if (!mc)
+ mmu_context_cache = mc = MMU_CONTEXT_FIRST_VERSION;
+ }
+ mm->context = mc;
+}
+
+/*
+ * Initialize the context related info for a new mm_struct
+ * instance.
+ */
+static inline int init_new_context(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+ mm->context = NO_CONTEXT;
+ return 0;
+}
+
+/*
+ * Destroy context related info for an mm_struct that is about
+ * to be put to rest.
+ */
+static inline void destroy_context(struct mm_struct *mm)
+{
+ /* Do nothing */
+}
+
+static inline void set_asid(unsigned long asid)
+{
+ /* XXX: We're destroying TLBEHI[8:31] */
+ sysreg_write(TLBEHI, asid & MMU_CONTEXT_ASID_MASK);
+ cpu_sync_pipeline();
+}
+
+static inline unsigned long get_asid(void)
+{
+ unsigned long asid;
+
+ asid = sysreg_read(TLBEHI);
+ return asid & MMU_CONTEXT_ASID_MASK;
+}
+
+static inline void activate_context(struct mm_struct *mm)
+{
+ get_mmu_context(mm);
+ set_asid(mm->context & MMU_CONTEXT_ASID_MASK);
+}
+
+static inline void switch_mm(struct mm_struct *prev,
+ struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ if (likely(prev != next)) {
+ unsigned long __pgdir = (unsigned long)next->pgd;
+
+ sysreg_write(PTBR, __pgdir);
+ activate_context(next);
+ }
+}
+
+#define deactivate_mm(tsk,mm) do { } while(0)
+
+#define activate_mm(prev, next) switch_mm((prev), (next), NULL)
+
+static inline void
+enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+
+static inline void enable_mmu(void)
+{
+ sysreg_write(MMUCR, (SYSREG_BIT(MMUCR_S)
+ | SYSREG_BIT(E)
+ | SYSREG_BIT(MMUCR_I)));
+ nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
+
+ if (mmu_context_cache == NO_CONTEXT)
+ mmu_context_cache = MMU_CONTEXT_FIRST_VERSION;
+
+ set_asid(mmu_context_cache & MMU_CONTEXT_ASID_MASK);
+}
+
+static inline void disable_mmu(void)
+{
+ sysreg_write(MMUCR, SYSREG_BIT(MMUCR_S));
+}
+
+#endif /* __ASM_AVR32_MMU_CONTEXT_H */
diff --git a/include/asm-avr32/module.h b/include/asm-avr32/module.h
new file mode 100644
index 000000000000..451444538a1b
--- /dev/null
+++ b/include/asm-avr32/module.h
@@ -0,0 +1,28 @@
+#ifndef __ASM_AVR32_MODULE_H
+#define __ASM_AVR32_MODULE_H
+
+struct mod_arch_syminfo {
+ unsigned long got_offset;
+ int got_initialized;
+};
+
+struct mod_arch_specific {
+ /* Starting offset of got in the module core memory. */
+ unsigned long got_offset;
+ /* Size of the got. */
+ unsigned long got_size;
+ /* Number of symbols in syminfo. */
+ int nsyms;
+ /* Additional symbol information (got offsets). */
+ struct mod_arch_syminfo *syminfo;
+};
+
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Ehdr Elf32_Ehdr
+
+#define MODULE_PROC_FAMILY "AVR32v1"
+
+#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
+
+#endif /* __ASM_AVR32_MODULE_H */
diff --git a/include/asm-avr32/msgbuf.h b/include/asm-avr32/msgbuf.h
new file mode 100644
index 000000000000..ac18bc4da7f7
--- /dev/null
+++ b/include/asm-avr32/msgbuf.h
@@ -0,0 +1,31 @@
+#ifndef __ASM_AVR32_MSGBUF_H
+#define __ASM_AVR32_MSGBUF_H
+
+/*
+ * The msqid64_ds structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct msqid64_ds {
+ struct ipc64_perm msg_perm;
+ __kernel_time_t msg_stime; /* last msgsnd time */
+ unsigned long __unused1;
+ __kernel_time_t msg_rtime; /* last msgrcv time */
+ unsigned long __unused2;
+ __kernel_time_t msg_ctime; /* last change time */
+ unsigned long __unused3;
+ unsigned long msg_cbytes; /* current number of bytes on queue */
+ unsigned long msg_qnum; /* number of messages in queue */
+ unsigned long msg_qbytes; /* max number of bytes on queue */
+ __kernel_pid_t msg_lspid; /* pid of last msgsnd */
+ __kernel_pid_t msg_lrpid; /* last receive pid */
+ unsigned long __unused4;
+ unsigned long __unused5;
+};
+
+#endif /* __ASM_AVR32_MSGBUF_H */
diff --git a/include/asm-avr32/mutex.h b/include/asm-avr32/mutex.h
new file mode 100644
index 000000000000..458c1f7fbc18
--- /dev/null
+++ b/include/asm-avr32/mutex.h
@@ -0,0 +1,9 @@
+/*
+ * Pull in the generic implementation for the mutex fastpath.
+ *
+ * TODO: implement optimized primitives instead, or leave the generic
+ * implementation in place, or pick the atomic_xchg() based generic
+ * implementation. (see asm-generic/mutex-xchg.h for details)
+ */
+
+#include <asm-generic/mutex-dec.h>
diff --git a/include/asm-avr32/namei.h b/include/asm-avr32/namei.h
new file mode 100644
index 000000000000..f0a26de06cab
--- /dev/null
+++ b/include/asm-avr32/namei.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_AVR32_NAMEI_H
+#define __ASM_AVR32_NAMEI_H
+
+/* This dummy routine may be changed to something useful */
+#define __emul_prefix() NULL
+
+#endif /* __ASM_AVR32_NAMEI_H */
diff --git a/include/asm-avr32/numnodes.h b/include/asm-avr32/numnodes.h
new file mode 100644
index 000000000000..0b864d7ce330
--- /dev/null
+++ b/include/asm-avr32/numnodes.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_AVR32_NUMNODES_H
+#define __ASM_AVR32_NUMNODES_H
+
+/* Max 4 nodes */
+#define NODES_SHIFT 2
+
+#endif /* __ASM_AVR32_NUMNODES_H */
diff --git a/include/asm-avr32/ocd.h b/include/asm-avr32/ocd.h
new file mode 100644
index 000000000000..46f73180a127
--- /dev/null
+++ b/include/asm-avr32/ocd.h
@@ -0,0 +1,78 @@
+/*
+ * AVR32 OCD Registers
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_OCD_H
+#define __ASM_AVR32_OCD_H
+
+/* Debug Registers */
+#define DBGREG_DID 0
+#define DBGREG_DC 8
+#define DBGREG_DS 16
+#define DBGREG_RWCS 28
+#define DBGREG_RWA 36
+#define DBGREG_RWD 40
+#define DBGREG_WT 44
+#define DBGREG_DTC 52
+#define DBGREG_DTSA0 56
+#define DBGREG_DTSA1 60
+#define DBGREG_DTEA0 72
+#define DBGREG_DTEA1 76
+#define DBGREG_BWC0A 88
+#define DBGREG_BWC0B 92
+#define DBGREG_BWC1A 96
+#define DBGREG_BWC1B 100
+#define DBGREG_BWC2A 104
+#define DBGREG_BWC2B 108
+#define DBGREG_BWC3A 112
+#define DBGREG_BWC3B 116
+#define DBGREG_BWA0A 120
+#define DBGREG_BWA0B 124
+#define DBGREG_BWA1A 128
+#define DBGREG_BWA1B 132
+#define DBGREG_BWA2A 136
+#define DBGREG_BWA2B 140
+#define DBGREG_BWA3A 144
+#define DBGREG_BWA3B 148
+#define DBGREG_BWD3A 153
+#define DBGREG_BWD3B 156
+
+#define DBGREG_PID 284
+
+#define SABAH_OCD 0x01
+#define SABAH_ICACHE 0x02
+#define SABAH_MEM_CACHED 0x04
+#define SABAH_MEM_UNCACHED 0x05
+
+/* Fields in the Development Control register */
+#define DC_SS_BIT 8
+
+#define DC_SS (1 << DC_SS_BIT)
+#define DC_DBE (1 << 13)
+#define DC_RID (1 << 27)
+#define DC_ORP (1 << 28)
+#define DC_MM (1 << 29)
+#define DC_RES (1 << 30)
+
+/* Fields in the Development Status register */
+#define DS_SSS (1 << 0)
+#define DS_SWB (1 << 1)
+#define DS_HWB (1 << 2)
+#define DS_BP_SHIFT 8
+#define DS_BP_MASK (0xff << DS_BP_SHIFT)
+
+#define __mfdr(addr) \
+({ \
+ register unsigned long value; \
+ asm volatile("mfdr %0, %1" : "=r"(value) : "i"(addr)); \
+ value; \
+})
+#define __mtdr(addr, value) \
+ asm volatile("mtdr %0, %1" : : "i"(addr), "r"(value))
+
+#endif /* __ASM_AVR32_OCD_H */
diff --git a/include/asm-avr32/page.h b/include/asm-avr32/page.h
new file mode 100644
index 000000000000..0f630b3e9932
--- /dev/null
+++ b/include/asm-avr32/page.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PAGE_H
+#define __ASM_AVR32_PAGE_H
+
+#ifdef __KERNEL__
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT 12
+#ifdef __ASSEMBLY__
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+#else
+#define PAGE_SIZE (1UL << PAGE_SHIFT)
+#endif
+#define PAGE_MASK (~(PAGE_SIZE-1))
+#define PTE_MASK PAGE_MASK
+
+#ifndef __ASSEMBLY__
+
+#include <asm/addrspace.h>
+
+extern void clear_page(void *to);
+extern void copy_page(void *to, void *from);
+
+#define clear_user_page(page, vaddr, pg) clear_page(page)
+#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+
+#define pte_val(x) ((x).pte)
+#define pgd_val(x) ((x).pgd)
+#define pgprot_val(x) ((x).pgprot)
+
+#define __pte(x) ((pte_t) { (x) })
+#define __pgd(x) ((pgd_t) { (x) })
+#define __pgprot(x) ((pgprot_t) { (x) })
+
+/* FIXME: These should be removed soon */
+extern unsigned long memory_start, memory_end;
+
+/* Pure 2^n version of get_order */
+static inline int get_order(unsigned long size)
+{
+ unsigned lz;
+
+ size = (size - 1) >> PAGE_SHIFT;
+ asm("clz %0, %1" : "=r"(lz) : "r"(size));
+ return 32 - lz;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+/* Align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
+
+/*
+ * The hardware maps the virtual addresses 0x80000000 -> 0x9fffffff
+ * permanently to the physical addresses 0x00000000 -> 0x1fffffff when
+ * segmentation is enabled. We want to make use of this in order to
+ * minimize TLB pressure.
+ */
+#define PAGE_OFFSET (0x80000000UL)
+
+/*
+ * ALSA uses virt_to_page() on DMA pages, which I'm not entirely sure
+ * is a good idea. Anyway, we can't simply subtract PAGE_OFFSET here
+ * in that case, so we'll have to mask out the three most significant
+ * bits of the address instead...
+ *
+ * What's the difference between __pa() and virt_to_phys() anyway?
+ */
+#define __pa(x) PHYSADDR(x)
+#define __va(x) ((void *)(P1SEGADDR(x)))
+
+#define MAP_NR(addr) (((unsigned long)(addr) - PAGE_OFFSET) >> PAGE_SHIFT)
+
+#define phys_to_page(phys) (pfn_to_page(phys >> PAGE_SHIFT))
+#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+
+#define PHYS_PFN_OFFSET (CONFIG_PHYS_OFFSET >> PAGE_SHIFT)
+
+#define pfn_to_page(pfn) (mem_map + ((pfn) - PHYS_PFN_OFFSET))
+#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PHYS_PFN_OFFSET)
+#define pfn_valid(pfn) ((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr))
+#endif /* CONFIG_NEED_MULTIPLE_NODES */
+
+#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+/*
+ * Memory above this physical address will be considered highmem.
+ */
+#define HIGHMEM_START 0x20000000UL
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_AVR32_PAGE_H */
diff --git a/include/asm-avr32/param.h b/include/asm-avr32/param.h
new file mode 100644
index 000000000000..34bc8d4c3b29
--- /dev/null
+++ b/include/asm-avr32/param.h
@@ -0,0 +1,23 @@
+#ifndef __ASM_AVR32_PARAM_H
+#define __ASM_AVR32_PARAM_H
+
+#ifdef __KERNEL__
+# define HZ CONFIG_HZ
+# define USER_HZ 100 /* User interfaces are in "ticks" */
+# define CLOCKS_PER_SEC (USER_HZ) /* frequency at which times() counts */
+#endif
+
+#ifndef HZ
+# define HZ 100
+#endif
+
+/* TODO: Should be configurable */
+#define EXEC_PAGESIZE 4096
+
+#ifndef NOGROUP
+# define NOGROUP (-1)
+#endif
+
+#define MAXHOSTNAMELEN 64
+
+#endif /* __ASM_AVR32_PARAM_H */
diff --git a/include/asm-avr32/pci.h b/include/asm-avr32/pci.h
new file mode 100644
index 000000000000..0f5f134b896a
--- /dev/null
+++ b/include/asm-avr32/pci.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_AVR32_PCI_H__
+#define __ASM_AVR32_PCI_H__
+
+/* We don't support PCI yet, but some drivers require this file anyway */
+
+#define PCI_DMA_BUS_IS_PHYS (1)
+
+#endif /* __ASM_AVR32_PCI_H__ */
diff --git a/include/asm-avr32/percpu.h b/include/asm-avr32/percpu.h
new file mode 100644
index 000000000000..69227b4cd0d4
--- /dev/null
+++ b/include/asm-avr32/percpu.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_PERCPU_H
+#define __ASM_AVR32_PERCPU_H
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ASM_AVR32_PERCPU_H */
diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h
new file mode 100644
index 000000000000..7492cfb92ced
--- /dev/null
+++ b/include/asm-avr32/pgalloc.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PGALLOC_H
+#define __ASM_AVR32_PGALLOC_H
+
+#include <asm/processor.h>
+#include <linux/threads.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#define pmd_populate_kernel(mm, pmd, pte) \
+ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
+
+static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+ struct page *pte)
+{
+ set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte)));
+}
+
+/*
+ * Allocate and free page tables
+ */
+static __inline__ pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ unsigned int pgd_size = (USER_PTRS_PER_PGD * sizeof(pgd_t));
+ pgd_t *pgd = (pgd_t *)kmalloc(pgd_size, GFP_KERNEL);
+
+ if (pgd)
+ memset(pgd, 0, pgd_size);
+
+ return pgd;
+}
+
+static inline void pgd_free(pgd_t *pgd)
+{
+ kfree(pgd);
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
+{
+ int count = 0;
+ pte_t *pte;
+
+ do {
+ pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT);
+ if (pte)
+ clear_page(pte);
+ else {
+ current->state = TASK_UNINTERRUPTIBLE;
+ schedule_timeout(HZ);
+ }
+ } while (!pte && (count++ < 10));
+
+ return pte;
+}
+
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
+{
+ int count = 0;
+ struct page *pte;
+
+ do {
+ pte = alloc_pages(GFP_KERNEL, 0);
+ if (pte)
+ clear_page(page_address(pte));
+ else {
+ current->state = TASK_UNINTERRUPTIBLE;
+ schedule_timeout(HZ);
+ }
+ } while (!pte && (count++ < 10));
+
+ return pte;
+}
+
+static inline void pte_free_kernel(pte_t *pte)
+{
+ free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct page *pte)
+{
+ __free_page(pte);
+}
+
+#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+
+#define check_pgt_cache() do { } while(0)
+
+#endif /* __ASM_AVR32_PGALLOC_H */
diff --git a/include/asm-avr32/pgtable-2level.h b/include/asm-avr32/pgtable-2level.h
new file mode 100644
index 000000000000..425dd567b5b9
--- /dev/null
+++ b/include/asm-avr32/pgtable-2level.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PGTABLE_2LEVEL_H
+#define __ASM_AVR32_PGTABLE_2LEVEL_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+/*
+ * Traditional 2-level paging structure
+ */
+#define PGDIR_SHIFT 22
+#define PTRS_PER_PGD 1024
+
+#define PTRS_PER_PTE 1024
+
+#ifndef __ASSEMBLY__
+#define pte_ERROR(e) \
+ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pgd_ERROR(e) \
+ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified. Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep, pteval)
+
+/*
+ * (pmds are folded into pgds so this doesn't get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
+
+#define pte_pfn(x) ((unsigned long)(((x).pte >> PAGE_SHIFT)))
+#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_PGTABLE_2LEVEL_H */
diff --git a/include/asm-avr32/pgtable.h b/include/asm-avr32/pgtable.h
new file mode 100644
index 000000000000..6b8ca9db2bd5
--- /dev/null
+++ b/include/asm-avr32/pgtable.h
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PGTABLE_H
+#define __ASM_AVR32_PGTABLE_H
+
+#include <asm/addrspace.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/sched.h>
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Use two-level page tables just as the i386 (without PAE)
+ */
+#include <asm/pgtable-2level.h>
+
+/*
+ * The following code might need some cleanup when the values are
+ * final...
+ */
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
+#define FIRST_USER_ADDRESS 0
+
+#define PTE_PHYS_MASK 0x1ffff000
+
+#ifndef __ASSEMBLY__
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern void paging_init(void);
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used for
+ * zero-mapped memory areas etc.
+ */
+extern struct page *empty_zero_page;
+#define ZERO_PAGE(vaddr) (empty_zero_page)
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8 MiB value just means that there will be a 8 MiB "hole"
+ * after the uncached physical memory (P2 segment) until the vmalloc
+ * area starts. That means that any out-of-bounds memory accesses will
+ * hopefully be caught; we don't know if the end of the P1/P2 segments
+ * are actually used for anything, but it is anyway safer to let the
+ * MMU catch these kinds of errors than to rely on the memory bus.
+ *
+ * A "hole" of the same size is added to the end of the P3 segment as
+ * well. It might seem wasteful to use 16 MiB of virtual address space
+ * on this, but we do have 512 MiB of it...
+ *
+ * The vmalloc() routines leave a hole of 4 KiB between each vmalloced
+ * area for the same reason.
+ */
+#define VMALLOC_OFFSET (8 * 1024 * 1024)
+#define VMALLOC_START (P3SEG + VMALLOC_OFFSET)
+#define VMALLOC_END (P4SEG - VMALLOC_OFFSET)
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Page flags. Some of these flags are not directly supported by
+ * hardware, so we have to emulate them.
+ */
+#define _TLBEHI_BIT_VALID 9
+#define _TLBEHI_VALID (1 << _TLBEHI_BIT_VALID)
+
+#define _PAGE_BIT_WT 0 /* W-bit : write-through */
+#define _PAGE_BIT_DIRTY 1 /* D-bit : page changed */
+#define _PAGE_BIT_SZ0 2 /* SZ0-bit : Size of page */
+#define _PAGE_BIT_SZ1 3 /* SZ1-bit : Size of page */
+#define _PAGE_BIT_EXECUTE 4 /* X-bit : execute access allowed */
+#define _PAGE_BIT_RW 5 /* AP0-bit : write access allowed */
+#define _PAGE_BIT_USER 6 /* AP1-bit : user space access allowed */
+#define _PAGE_BIT_BUFFER 7 /* B-bit : bufferable */
+#define _PAGE_BIT_GLOBAL 8 /* G-bit : global (ignore ASID) */
+#define _PAGE_BIT_CACHABLE 9 /* C-bit : cachable */
+
+/* If we drop support for 1K pages, we get two extra bits */
+#define _PAGE_BIT_PRESENT 10
+#define _PAGE_BIT_ACCESSED 11 /* software: page was accessed */
+
+/* The following flags are only valid when !PRESENT */
+#define _PAGE_BIT_FILE 0 /* software: pagecache or swap? */
+
+#define _PAGE_WT (1 << _PAGE_BIT_WT)
+#define _PAGE_DIRTY (1 << _PAGE_BIT_DIRTY)
+#define _PAGE_EXECUTE (1 << _PAGE_BIT_EXECUTE)
+#define _PAGE_RW (1 << _PAGE_BIT_RW)
+#define _PAGE_USER (1 << _PAGE_BIT_USER)
+#define _PAGE_BUFFER (1 << _PAGE_BIT_BUFFER)
+#define _PAGE_GLOBAL (1 << _PAGE_BIT_GLOBAL)
+#define _PAGE_CACHABLE (1 << _PAGE_BIT_CACHABLE)
+
+/* Software flags */
+#define _PAGE_ACCESSED (1 << _PAGE_BIT_ACCESSED)
+#define _PAGE_PRESENT (1 << _PAGE_BIT_PRESENT)
+#define _PAGE_FILE (1 << _PAGE_BIT_FILE)
+
+/*
+ * Page types, i.e. sizes. _PAGE_TYPE_NONE corresponds to what is
+ * usually called _PAGE_PROTNONE on other architectures.
+ *
+ * XXX: Find out if _PAGE_PROTNONE is equivalent with !_PAGE_USER. If
+ * so, we can encode all possible page sizes (although we can't really
+ * support 1K pages anyway due to the _PAGE_PRESENT and _PAGE_ACCESSED
+ * bits)
+ *
+ */
+#define _PAGE_TYPE_MASK ((1 << _PAGE_BIT_SZ0) | (1 << _PAGE_BIT_SZ1))
+#define _PAGE_TYPE_NONE (0 << _PAGE_BIT_SZ0)
+#define _PAGE_TYPE_SMALL (1 << _PAGE_BIT_SZ0)
+#define _PAGE_TYPE_MEDIUM (2 << _PAGE_BIT_SZ0)
+#define _PAGE_TYPE_LARGE (3 << _PAGE_BIT_SZ0)
+
+/*
+ * Mask which drop software flags. We currently can't handle more than
+ * 512 MiB of physical memory, so we can use bits 29-31 for other
+ * stuff. With a fixed 4K page size, we can use bits 10-11 as well as
+ * bits 2-3 (SZ)
+ */
+#define _PAGE_FLAGS_HARDWARE_MASK 0xfffff3ff
+
+#define _PAGE_FLAGS_CACHE_MASK (_PAGE_CACHABLE | _PAGE_BUFFER | _PAGE_WT)
+
+/* TODO: Check for saneness */
+/* User-mode page table flags (to be set in a pgd or pmd entry) */
+#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_TYPE_SMALL | _PAGE_RW \
+ | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+/* Kernel-mode page table flags */
+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_TYPE_SMALL | _PAGE_RW \
+ | _PAGE_ACCESSED | _PAGE_DIRTY)
+/* Flags that may be modified by software */
+#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY \
+ | _PAGE_FLAGS_CACHE_MASK)
+
+#define _PAGE_FLAGS_READ (_PAGE_CACHABLE | _PAGE_BUFFER)
+#define _PAGE_FLAGS_WRITE (_PAGE_FLAGS_READ | _PAGE_RW | _PAGE_DIRTY)
+
+#define _PAGE_NORMAL(x) __pgprot((x) | _PAGE_PRESENT | _PAGE_TYPE_SMALL \
+ | _PAGE_ACCESSED)
+
+#define PAGE_NONE (_PAGE_ACCESSED | _PAGE_TYPE_NONE)
+#define PAGE_READ (_PAGE_FLAGS_READ | _PAGE_USER)
+#define PAGE_EXEC (_PAGE_FLAGS_READ | _PAGE_EXECUTE | _PAGE_USER)
+#define PAGE_WRITE (_PAGE_FLAGS_WRITE | _PAGE_USER)
+#define PAGE_KERNEL _PAGE_NORMAL(_PAGE_FLAGS_WRITE | _PAGE_EXECUTE | _PAGE_GLOBAL)
+#define PAGE_KERNEL_RO _PAGE_NORMAL(_PAGE_FLAGS_READ | _PAGE_EXECUTE | _PAGE_GLOBAL)
+
+#define _PAGE_P(x) _PAGE_NORMAL((x) & ~(_PAGE_RW | _PAGE_DIRTY))
+#define _PAGE_S(x) _PAGE_NORMAL(x)
+
+#define PAGE_COPY _PAGE_P(PAGE_WRITE | PAGE_READ)
+
+#ifndef __ASSEMBLY__
+/*
+ * The hardware supports flags for write- and execute access. Read is
+ * always allowed if the page is loaded into the TLB, so the "-w-",
+ * "--x" and "-wx" mappings are implemented as "rw-", "r-x" and "rwx",
+ * respectively.
+ *
+ * The "---" case is handled by software; the page will simply not be
+ * loaded into the TLB if the page type is _PAGE_TYPE_NONE.
+ */
+
+#define __P000 __pgprot(PAGE_NONE)
+#define __P001 _PAGE_P(PAGE_READ)
+#define __P010 _PAGE_P(PAGE_WRITE)
+#define __P011 _PAGE_P(PAGE_WRITE | PAGE_READ)
+#define __P100 _PAGE_P(PAGE_EXEC)
+#define __P101 _PAGE_P(PAGE_EXEC | PAGE_READ)
+#define __P110 _PAGE_P(PAGE_EXEC | PAGE_WRITE)
+#define __P111 _PAGE_P(PAGE_EXEC | PAGE_WRITE | PAGE_READ)
+
+#define __S000 __pgprot(PAGE_NONE)
+#define __S001 _PAGE_S(PAGE_READ)
+#define __S010 _PAGE_S(PAGE_WRITE)
+#define __S011 _PAGE_S(PAGE_WRITE | PAGE_READ)
+#define __S100 _PAGE_S(PAGE_EXEC)
+#define __S101 _PAGE_S(PAGE_EXEC | PAGE_READ)
+#define __S110 _PAGE_S(PAGE_EXEC | PAGE_WRITE)
+#define __S111 _PAGE_S(PAGE_EXEC | PAGE_WRITE | PAGE_READ)
+
+#define pte_none(x) (!pte_val(x))
+#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
+
+#define pte_clear(mm,addr,xp) \
+ do { \
+ set_pte_at(mm, addr, xp, __pte(0)); \
+ } while (0)
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_read(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_USER;
+}
+static inline int pte_write(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_RW;
+}
+static inline int pte_exec(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_EXECUTE;
+}
+static inline int pte_dirty(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_DIRTY;
+}
+static inline int pte_young(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_ACCESSED;
+}
+
+/*
+ * The following only work if pte_present() is not true.
+ */
+static inline int pte_file(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_FILE;
+}
+
+/* Mutator functions for PTE bits */
+static inline pte_t pte_rdprotect(pte_t pte)
+{
+ set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER));
+ return pte;
+}
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW));
+ return pte;
+}
+static inline pte_t pte_exprotect(pte_t pte)
+{
+ set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_EXECUTE));
+ return pte;
+}
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY));
+ return pte;
+}
+static inline pte_t pte_mkold(pte_t pte)
+{
+ set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED));
+ return pte;
+}
+static inline pte_t pte_mkread(pte_t pte)
+{
+ set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER));
+ return pte;
+}
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+ set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW));
+ return pte;
+}
+static inline pte_t pte_mkexec(pte_t pte)
+{
+ set_pte(&pte, __pte(pte_val(pte) | _PAGE_EXECUTE));
+ return pte;
+}
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY));
+ return pte;
+}
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED));
+ return pte;
+}
+
+#define pmd_none(x) (!pmd_val(x))
+#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
+#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
+#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) \
+ != _KERNPG_TABLE)
+
+/*
+ * Permanent address of a page. We don't support highmem, so this is
+ * trivial.
+ */
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+#define pte_page(x) phys_to_page(pte_val(x) & PTE_PHYS_MASK)
+
+/*
+ * Mark the prot value as uncacheable and unbufferable
+ */
+#define pgprot_noncached(prot) \
+ __pgprot(pgprot_val(prot) & ~(_PAGE_BUFFER | _PAGE_CACHABLE))
+
+/*
+ * Mark the prot value as uncacheable but bufferable
+ */
+#define pgprot_writecombine(prot) \
+ __pgprot((pgprot_val(prot) & ~_PAGE_CACHABLE) | _PAGE_BUFFER)
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * extern pte_t mk_pte(struct page *page, pgprot_t pgprot)
+ */
+#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK)
+ | pgprot_val(newprot)));
+ return pte;
+}
+
+#define page_pte(page) page_pte_prot(page, __pgprot(0))
+
+#define pmd_page_vaddr(pmd) \
+ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
+#define pmd_page(pmd) (phys_to_page(pmd_val(pmd)))
+
+/* to find an entry in a page-table-directory. */
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+#define pgd_offset_current(address) \
+ ((pgd_t *)__mfsr(SYSREG_PTBR) + pgd_index(address))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/* Find an entry in the third-level page table.. */
+#define pte_index(address) \
+ ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset(dir, address) \
+ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
+#define pte_offset_kernel(dir, address) \
+ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
+#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
+#define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address)
+#define pte_unmap(pte) do { } while (0)
+#define pte_unmap_nested(pte) do { } while (0)
+
+struct vm_area_struct;
+extern void update_mmu_cache(struct vm_area_struct * vma,
+ unsigned long address, pte_t pte);
+
+/*
+ * Encode and decode a swap entry
+ *
+ * Constraints:
+ * _PAGE_FILE at bit 0
+ * _PAGE_TYPE_* at bits 2-3 (for emulating _PAGE_PROTNONE)
+ * _PAGE_PRESENT at bit 10
+ *
+ * We encode the type into bits 4-9 and offset into bits 11-31. This
+ * gives us a 21 bits offset, or 2**21 * 4K = 8G usable swap space per
+ * device, and 64 possible types.
+ *
+ * NOTE: We should set ZEROs at the position of _PAGE_PRESENT
+ * and _PAGE_PROTNONE bits
+ */
+#define __swp_type(x) (((x).val >> 4) & 0x3f)
+#define __swp_offset(x) ((x).val >> 11)
+#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 4) | ((offset) << 11) })
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+/*
+ * Encode and decode a nonlinear file mapping entry. We have to
+ * preserve _PAGE_FILE and _PAGE_PRESENT here. _PAGE_TYPE_* isn't
+ * necessary, since _PAGE_FILE implies !_PAGE_PROTNONE (?)
+ */
+#define PTE_FILE_MAX_BITS 30
+#define pte_to_pgoff(pte) (((pte_val(pte) >> 1) & 0x1ff) \
+ | ((pte_val(pte) >> 11) << 9))
+#define pgoff_to_pte(off) ((pte_t) { ((((off) & 0x1ff) << 1) \
+ | (((off) >> 9) << 11) \
+ | _PAGE_FILE) })
+
+typedef pte_t *pte_addr_t;
+
+#define kern_addr_valid(addr) (1)
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+ remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+#define MK_IOSPACE_PFN(space, pfn) (pfn)
+#define GET_IOSPACE(pfn) 0
+#define GET_PFN(pfn) (pfn)
+
+/* No page table caches to initialize (?) */
+#define pgtable_cache_init() do { } while(0)
+
+#include <asm-generic/pgtable.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_PGTABLE_H */
diff --git a/include/asm-avr32/poll.h b/include/asm-avr32/poll.h
new file mode 100644
index 000000000000..736e29755dfc
--- /dev/null
+++ b/include/asm-avr32/poll.h
@@ -0,0 +1,27 @@
+#ifndef __ASM_AVR32_POLL_H
+#define __ASM_AVR32_POLL_H
+
+/* These are specified by iBCS2 */
+#define POLLIN 0x0001
+#define POLLPRI 0x0002
+#define POLLOUT 0x0004
+#define POLLERR 0x0008
+#define POLLHUP 0x0010
+#define POLLNVAL 0x0020
+
+/* The rest seem to be more-or-less nonstandard. Check them! */
+#define POLLRDNORM 0x0040
+#define POLLRDBAND 0x0080
+#define POLLWRNORM 0x0100
+#define POLLWRBAND 0x0200
+#define POLLMSG 0x0400
+#define POLLREMOVE 0x1000
+#define POLLRDHUP 0x2000
+
+struct pollfd {
+ int fd;
+ short events;
+ short revents;
+};
+
+#endif /* __ASM_AVR32_POLL_H */
diff --git a/include/asm-avr32/posix_types.h b/include/asm-avr32/posix_types.h
new file mode 100644
index 000000000000..2831b039b349
--- /dev/null
+++ b/include/asm-avr32/posix_types.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_POSIX_TYPES_H
+#define __ASM_AVR32_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc. Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned long __kernel_ino_t;
+typedef unsigned short __kernel_mode_t;
+typedef unsigned short __kernel_nlink_t;
+typedef long __kernel_off_t;
+typedef int __kernel_pid_t;
+typedef unsigned short __kernel_ipc_pid_t;
+typedef unsigned int __kernel_uid_t;
+typedef unsigned int __kernel_gid_t;
+typedef unsigned long __kernel_size_t;
+typedef int __kernel_ssize_t;
+typedef int __kernel_ptrdiff_t;
+typedef long __kernel_time_t;
+typedef long __kernel_suseconds_t;
+typedef long __kernel_clock_t;
+typedef int __kernel_timer_t;
+typedef int __kernel_clockid_t;
+typedef int __kernel_daddr_t;
+typedef char * __kernel_caddr_t;
+typedef unsigned short __kernel_uid16_t;
+typedef unsigned short __kernel_gid16_t;
+typedef unsigned int __kernel_uid32_t;
+typedef unsigned int __kernel_gid32_t;
+
+typedef unsigned short __kernel_old_uid_t;
+typedef unsigned short __kernel_old_gid_t;
+typedef unsigned short __kernel_old_dev_t;
+
+#ifdef __GNUC__
+typedef long long __kernel_loff_t;
+#endif
+
+typedef struct {
+#if defined(__KERNEL__) || defined(__USE_ALL)
+ int val[2];
+#else /* !defined(__KERNEL__) && !defined(__USE_ALL) */
+ int __val[2];
+#endif /* !defined(__KERNEL__) && !defined(__USE_ALL) */
+} __kernel_fsid_t;
+
+#if defined(__KERNEL__)
+
+#undef __FD_SET
+static __inline__ void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+ unsigned long __tmp = __fd / __NFDBITS;
+ unsigned long __rem = __fd % __NFDBITS;
+ __fdsetp->fds_bits[__tmp] |= (1UL<<__rem);
+}
+
+#undef __FD_CLR
+static __inline__ void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+ unsigned long __tmp = __fd / __NFDBITS;
+ unsigned long __rem = __fd % __NFDBITS;
+ __fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem);
+}
+
+
+#undef __FD_ISSET
+static __inline__ int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p)
+{
+ unsigned long __tmp = __fd / __NFDBITS;
+ unsigned long __rem = __fd % __NFDBITS;
+ return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant case (8 ints,
+ * for a 256-bit fd_set)
+ */
+#undef __FD_ZERO
+static __inline__ void __FD_ZERO(__kernel_fd_set *__p)
+{
+ unsigned long *__tmp = __p->fds_bits;
+ int __i;
+
+ if (__builtin_constant_p(__FDSET_LONGS)) {
+ switch (__FDSET_LONGS) {
+ case 16:
+ __tmp[ 0] = 0; __tmp[ 1] = 0;
+ __tmp[ 2] = 0; __tmp[ 3] = 0;
+ __tmp[ 4] = 0; __tmp[ 5] = 0;
+ __tmp[ 6] = 0; __tmp[ 7] = 0;
+ __tmp[ 8] = 0; __tmp[ 9] = 0;
+ __tmp[10] = 0; __tmp[11] = 0;
+ __tmp[12] = 0; __tmp[13] = 0;
+ __tmp[14] = 0; __tmp[15] = 0;
+ return;
+
+ case 8:
+ __tmp[ 0] = 0; __tmp[ 1] = 0;
+ __tmp[ 2] = 0; __tmp[ 3] = 0;
+ __tmp[ 4] = 0; __tmp[ 5] = 0;
+ __tmp[ 6] = 0; __tmp[ 7] = 0;
+ return;
+
+ case 4:
+ __tmp[ 0] = 0; __tmp[ 1] = 0;
+ __tmp[ 2] = 0; __tmp[ 3] = 0;
+ return;
+ }
+ }
+ __i = __FDSET_LONGS;
+ while (__i) {
+ __i--;
+ *__tmp = 0;
+ __tmp++;
+ }
+}
+
+#endif /* defined(__KERNEL__) */
+
+#endif /* __ASM_AVR32_POSIX_TYPES_H */
diff --git a/include/asm-avr32/processor.h b/include/asm-avr32/processor.h
new file mode 100644
index 000000000000..f6913778a45f
--- /dev/null
+++ b/include/asm-avr32/processor.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PROCESSOR_H
+#define __ASM_AVR32_PROCESSOR_H
+
+#include <asm/page.h>
+#include <asm/cache.h>
+
+#define TASK_SIZE 0x80000000
+
+#ifndef __ASSEMBLY__
+
+static inline void *current_text_addr(void)
+{
+ register void *pc asm("pc");
+ return pc;
+}
+
+enum arch_type {
+ ARCH_AVR32A,
+ ARCH_AVR32B,
+ ARCH_MAX
+};
+
+enum cpu_type {
+ CPU_MORGAN,
+ CPU_AT32AP,
+ CPU_MAX
+};
+
+enum tlb_config {
+ TLB_NONE,
+ TLB_SPLIT,
+ TLB_UNIFIED,
+ TLB_INVALID
+};
+
+struct avr32_cpuinfo {
+ struct clk *clk;
+ unsigned long loops_per_jiffy;
+ enum arch_type arch_type;
+ enum cpu_type cpu_type;
+ unsigned short arch_revision;
+ unsigned short cpu_revision;
+ enum tlb_config tlb_config;
+
+ struct cache_info icache;
+ struct cache_info dcache;
+};
+
+extern struct avr32_cpuinfo boot_cpu_data;
+
+#ifdef CONFIG_SMP
+extern struct avr32_cpuinfo cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data (&boot_cpu_data)
+#define current_cpu_data boot_cpu_data
+#endif
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's
+ */
+#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
+
+#define cpu_relax() barrier()
+#define cpu_sync_pipeline() asm volatile("sub pc, -2" : : : "memory")
+
+struct cpu_context {
+ unsigned long sr;
+ unsigned long pc;
+ unsigned long ksp; /* Kernel stack pointer */
+ unsigned long r7;
+ unsigned long r6;
+ unsigned long r5;
+ unsigned long r4;
+ unsigned long r3;
+ unsigned long r2;
+ unsigned long r1;
+ unsigned long r0;
+};
+
+/* This struct contains the CPU context as stored by switch_to() */
+struct thread_struct {
+ struct cpu_context cpu_context;
+ unsigned long single_step_addr;
+ u16 single_step_insn;
+};
+
+#define INIT_THREAD { \
+ .cpu_context = { \
+ .ksp = sizeof(init_stack) + (long)&init_stack, \
+ }, \
+}
+
+/*
+ * Do necessary setup to start up a newly executed thread.
+ */
+#define start_thread(regs, new_pc, new_sp) \
+ do { \
+ set_fs(USER_DS); \
+ memset(regs, 0, sizeof(*regs)); \
+ regs->sr = MODE_USER; \
+ regs->pc = new_pc & ~1; \
+ regs->sp = new_sp; \
+ } while(0)
+
+struct task_struct;
+
+/* Free all resources held by a thread */
+extern void release_thread(struct task_struct *);
+
+/* Create a kernel thread without removing it from tasklists */
+extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+/* Prepare to copy thread state - unlazy all lazy status */
+#define prepare_to_copy(tsk) do { } while(0)
+
+/* Return saved PC of a blocked thread */
+#define thread_saved_pc(tsk) ((tsk)->thread.cpu_context.pc)
+
+struct pt_regs;
+void show_trace(struct task_struct *task, unsigned long *stack,
+ struct pt_regs *regs);
+
+extern unsigned long get_wchan(struct task_struct *p);
+
+#define KSTK_EIP(tsk) ((tsk)->thread.cpu_context.pc)
+#define KSTK_ESP(tsk) ((tsk)->thread.cpu_context.ksp)
+
+#define ARCH_HAS_PREFETCH
+
+static inline void prefetch(const void *x)
+{
+ const char *c = x;
+ asm volatile("pref %0" : : "r"(c));
+}
+#define PREFETCH_STRIDE L1_CACHE_BYTES
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_PROCESSOR_H */
diff --git a/include/asm-avr32/ptrace.h b/include/asm-avr32/ptrace.h
new file mode 100644
index 000000000000..60f0f19a81f1
--- /dev/null
+++ b/include/asm-avr32/ptrace.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PTRACE_H
+#define __ASM_AVR32_PTRACE_H
+
+#define PTRACE_GETREGS 12
+#define PTRACE_SETREGS 13
+
+/*
+ * Status Register bits
+ */
+#define SR_H 0x40000000
+#define SR_R 0x20000000
+#define SR_J 0x10000000
+#define SR_DM 0x08000000
+#define SR_D 0x04000000
+#define MODE_NMI 0x01c00000
+#define MODE_EXCEPTION 0x01800000
+#define MODE_INT3 0x01400000
+#define MODE_INT2 0x01000000
+#define MODE_INT1 0x00c00000
+#define MODE_INT0 0x00800000
+#define MODE_SUPERVISOR 0x00400000
+#define MODE_USER 0x00000000
+#define MODE_MASK 0x01c00000
+#define SR_EM 0x00200000
+#define SR_I3M 0x00100000
+#define SR_I2M 0x00080000
+#define SR_I1M 0x00040000
+#define SR_I0M 0x00020000
+#define SR_GM 0x00010000
+
+#define SR_H_BIT 30
+#define SR_R_BIT 29
+#define SR_J_BIT 28
+#define SR_DM_BIT 27
+#define SR_D_BIT 26
+#define MODE_SHIFT 22
+#define SR_EM_BIT 21
+#define SR_I3M_BIT 20
+#define SR_I2M_BIT 19
+#define SR_I1M_BIT 18
+#define SR_I0M_BIT 17
+#define SR_GM_BIT 16
+
+/* The user-visible part */
+#define SR_L 0x00000020
+#define SR_Q 0x00000010
+#define SR_V 0x00000008
+#define SR_N 0x00000004
+#define SR_Z 0x00000002
+#define SR_C 0x00000001
+
+#define SR_L_BIT 5
+#define SR_Q_BIT 4
+#define SR_V_BIT 3
+#define SR_N_BIT 2
+#define SR_Z_BIT 1
+#define SR_C_BIT 0
+
+/*
+ * The order is defined by the stmts instruction. r0 is stored first,
+ * so it gets the highest address.
+ *
+ * Registers 0-12 are general-purpose registers (r12 is normally used for
+ * the function return value).
+ * Register 13 is the stack pointer
+ * Register 14 is the link register
+ * Register 15 is the program counter (retrieved from the RAR sysreg)
+ */
+#define FRAME_SIZE_FULL 72
+#define REG_R12_ORIG 68
+#define REG_R0 64
+#define REG_R1 60
+#define REG_R2 56
+#define REG_R3 52
+#define REG_R4 48
+#define REG_R5 44
+#define REG_R6 40
+#define REG_R7 36
+#define REG_R8 32
+#define REG_R9 28
+#define REG_R10 24
+#define REG_R11 20
+#define REG_R12 16
+#define REG_SP 12
+#define REG_LR 8
+
+#define FRAME_SIZE_MIN 8
+#define REG_PC 4
+#define REG_SR 0
+
+#ifndef __ASSEMBLY__
+struct pt_regs {
+ /* These are always saved */
+ unsigned long sr;
+ unsigned long pc;
+
+ /* These are sometimes saved */
+ unsigned long lr;
+ unsigned long sp;
+ unsigned long r12;
+ unsigned long r11;
+ unsigned long r10;
+ unsigned long r9;
+ unsigned long r8;
+ unsigned long r7;
+ unsigned long r6;
+ unsigned long r5;
+ unsigned long r4;
+ unsigned long r3;
+ unsigned long r2;
+ unsigned long r1;
+ unsigned long r0;
+
+ /* Only saved on system call */
+ unsigned long r12_orig;
+};
+
+#ifdef __KERNEL__
+# define user_mode(regs) (((regs)->sr & MODE_MASK) == MODE_USER)
+extern void show_regs (struct pt_regs *);
+
+static __inline__ int valid_user_regs(struct pt_regs *regs)
+{
+ /*
+ * Some of the Java bits might be acceptable if/when we
+ * implement some support for that stuff...
+ */
+ if ((regs->sr & 0xffff0000) == 0)
+ return 1;
+
+ /*
+ * Force status register flags to be sane and report this
+ * illegal behaviour...
+ */
+ regs->sr &= 0x0000ffff;
+ return 0;
+}
+
+#define instruction_pointer(regs) ((regs)->pc)
+
+#define profile_pc(regs) instruction_pointer(regs)
+
+#endif /* __KERNEL__ */
+
+#endif /* ! __ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_PTRACE_H */
diff --git a/include/asm-avr32/resource.h b/include/asm-avr32/resource.h
new file mode 100644
index 000000000000..c6dd101472b1
--- /dev/null
+++ b/include/asm-avr32/resource.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_RESOURCE_H
+#define __ASM_AVR32_RESOURCE_H
+
+#include <asm-generic/resource.h>
+
+#endif /* __ASM_AVR32_RESOURCE_H */
diff --git a/include/asm-avr32/scatterlist.h b/include/asm-avr32/scatterlist.h
new file mode 100644
index 000000000000..bfe7d753423c
--- /dev/null
+++ b/include/asm-avr32/scatterlist.h
@@ -0,0 +1,21 @@
+#ifndef __ASM_AVR32_SCATTERLIST_H
+#define __ASM_AVR32_SCATTERLIST_H
+
+struct scatterlist {
+ struct page *page;
+ unsigned int offset;
+ dma_addr_t dma_address;
+ unsigned int length;
+};
+
+/* These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg) ((sg)->dma_address)
+#define sg_dma_len(sg) ((sg)->length)
+
+#define ISA_DMA_THRESHOLD (0xffffffff)
+
+#endif /* __ASM_AVR32_SCATTERLIST_H */
diff --git a/include/asm-avr32/sections.h b/include/asm-avr32/sections.h
new file mode 100644
index 000000000000..aa14252e4181
--- /dev/null
+++ b/include/asm-avr32/sections.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_SECTIONS_H
+#define __ASM_AVR32_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+#endif /* __ASM_AVR32_SECTIONS_H */
diff --git a/include/asm-avr32/semaphore.h b/include/asm-avr32/semaphore.h
new file mode 100644
index 000000000000..ef99ddccc10c
--- /dev/null
+++ b/include/asm-avr32/semaphore.h
@@ -0,0 +1,109 @@
+/*
+ * SMP- and interrupt-safe semaphores.
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * Based on include/asm-i386/semaphore.h
+ * Copyright (C) 1996 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SEMAPHORE_H
+#define __ASM_AVR32_SEMAPHORE_H
+
+#include <linux/linkage.h>
+
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <linux/wait.h>
+#include <linux/rwsem.h>
+
+struct semaphore {
+ atomic_t count;
+ int sleepers;
+ wait_queue_head_t wait;
+};
+
+#define __SEMAPHORE_INITIALIZER(name, n) \
+{ \
+ .count = ATOMIC_INIT(n), \
+ .wait = __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \
+}
+
+#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
+ struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
+
+#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
+#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0)
+
+static inline void sema_init (struct semaphore *sem, int val)
+{
+ atomic_set(&sem->count, val);
+ sem->sleepers = 0;
+ init_waitqueue_head(&sem->wait);
+}
+
+static inline void init_MUTEX (struct semaphore *sem)
+{
+ sema_init(sem, 1);
+}
+
+static inline void init_MUTEX_LOCKED (struct semaphore *sem)
+{
+ sema_init(sem, 0);
+}
+
+void __down(struct semaphore * sem);
+int __down_interruptible(struct semaphore * sem);
+void __up(struct semaphore * sem);
+
+/*
+ * This is ugly, but we want the default case to fall through.
+ * "__down_failed" is a special asm handler that calls the C
+ * routine that actually waits. See arch/i386/kernel/semaphore.c
+ */
+static inline void down(struct semaphore * sem)
+{
+ might_sleep();
+ if (unlikely(atomic_dec_return (&sem->count) < 0))
+ __down (sem);
+}
+
+/*
+ * Interruptible try to acquire a semaphore. If we obtained
+ * it, return zero. If we were interrupted, returns -EINTR
+ */
+static inline int down_interruptible(struct semaphore * sem)
+{
+ int ret = 0;
+
+ might_sleep();
+ if (unlikely(atomic_dec_return (&sem->count) < 0))
+ ret = __down_interruptible (sem);
+ return ret;
+}
+
+/*
+ * Non-blockingly attempt to down() a semaphore.
+ * Returns zero if we acquired it
+ */
+static inline int down_trylock(struct semaphore * sem)
+{
+ return atomic_dec_if_positive(&sem->count) < 0;
+}
+
+/*
+ * Note! This is subtle. We jump to wake people up only if
+ * the semaphore was negative (== somebody was waiting on it).
+ * The default case (no contention) will result in NO
+ * jumps for both down() and up().
+ */
+static inline void up(struct semaphore * sem)
+{
+ if (unlikely(atomic_inc_return (&sem->count) <= 0))
+ __up (sem);
+}
+
+#endif /*__ASM_AVR32_SEMAPHORE_H */
diff --git a/include/asm-avr32/sembuf.h b/include/asm-avr32/sembuf.h
new file mode 100644
index 000000000000..e472216e0c97
--- /dev/null
+++ b/include/asm-avr32/sembuf.h
@@ -0,0 +1,25 @@
+#ifndef __ASM_AVR32_SEMBUF_H
+#define __ASM_AVR32_SEMBUF_H
+
+/*
+* The semid64_ds structure for AVR32 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct semid64_ds {
+ struct ipc64_perm sem_perm; /* permissions .. see ipc.h */
+ __kernel_time_t sem_otime; /* last semop time */
+ unsigned long __unused1;
+ __kernel_time_t sem_ctime; /* last change time */
+ unsigned long __unused2;
+ unsigned long sem_nsems; /* no. of semaphores in array */
+ unsigned long __unused3;
+ unsigned long __unused4;
+};
+
+#endif /* __ASM_AVR32_SEMBUF_H */
diff --git a/include/asm-avr32/setup.h b/include/asm-avr32/setup.h
new file mode 100644
index 000000000000..10193da4113b
--- /dev/null
+++ b/include/asm-avr32/setup.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/include/asm-arm/setup.h
+ * Copyright (C) 1997-1999 Russel King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SETUP_H__
+#define __ASM_AVR32_SETUP_H__
+
+#define COMMAND_LINE_SIZE 256
+
+/* Magic number indicating that a tag table is present */
+#define ATAG_MAGIC 0xa2a25441
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Generic memory range, used by several tags.
+ *
+ * addr is always physical.
+ * size is measured in bytes.
+ * next is for use by the OS, e.g. for grouping regions into
+ * linked lists.
+ */
+struct tag_mem_range {
+ u32 addr;
+ u32 size;
+ struct tag_mem_range * next;
+};
+
+/* The list ends with an ATAG_NONE node. */
+#define ATAG_NONE 0x00000000
+
+struct tag_header {
+ u32 size;
+ u32 tag;
+};
+
+/* The list must start with an ATAG_CORE node */
+#define ATAG_CORE 0x54410001
+
+struct tag_core {
+ u32 flags;
+ u32 pagesize;
+ u32 rootdev;
+};
+
+/* it is allowed to have multiple ATAG_MEM nodes */
+#define ATAG_MEM 0x54410002
+/* ATAG_MEM uses tag_mem_range */
+
+/* command line: \0 terminated string */
+#define ATAG_CMDLINE 0x54410003
+
+struct tag_cmdline {
+ char cmdline[1]; /* this is the minimum size */
+};
+
+/* Ramdisk image (may be compressed) */
+#define ATAG_RDIMG 0x54410004
+/* ATAG_RDIMG uses tag_mem_range */
+
+/* Information about various clocks present in the system */
+#define ATAG_CLOCK 0x54410005
+
+struct tag_clock {
+ u32 clock_id; /* Which clock are we talking about? */
+ u32 clock_flags; /* Special features */
+ u64 clock_hz; /* Clock speed in Hz */
+};
+
+/* The clock types we know about */
+#define CLOCK_BOOTCPU 0
+
+/* Memory reserved for the system (e.g. the bootloader) */
+#define ATAG_RSVD_MEM 0x54410006
+/* ATAG_RSVD_MEM uses tag_mem_range */
+
+/* Ethernet information */
+
+#define ATAG_ETHERNET 0x54410007
+
+struct tag_ethernet {
+ u8 mac_index;
+ u8 mii_phy_addr;
+ u8 hw_address[6];
+};
+
+#define ETH_INVALID_PHY 0xff
+
+struct tag {
+ struct tag_header hdr;
+ union {
+ struct tag_core core;
+ struct tag_mem_range mem_range;
+ struct tag_cmdline cmdline;
+ struct tag_clock clock;
+ struct tag_ethernet ethernet;
+ } u;
+};
+
+struct tagtable {
+ u32 tag;
+ int (*parse)(struct tag *);
+};
+
+#define __tag __attribute_used__ __attribute__((__section__(".taglist")))
+#define __tagtable(tag, fn) \
+ static struct tagtable __tagtable_##fn __tag = { tag, fn }
+
+#define tag_member_present(tag,member) \
+ ((unsigned long)(&((struct tag *)0L)->member + 1) \
+ <= (tag)->hdr.size * 4)
+
+#define tag_next(t) ((struct tag *)((u32 *)(t) + (t)->hdr.size))
+#define tag_size(type) ((sizeof(struct tag_header) + sizeof(struct type)) >> 2)
+
+#define for_each_tag(t,base) \
+ for (t = base; t->hdr.size; t = tag_next(t))
+
+extern struct tag_mem_range *mem_phys;
+extern struct tag_mem_range *mem_reserved;
+extern struct tag_mem_range *mem_ramdisk;
+
+extern struct tag *bootloader_tags;
+
+extern void setup_bootmem(void);
+extern void setup_processor(void);
+extern void board_setup_fbmem(unsigned long fbmem_start,
+ unsigned long fbmem_size);
+
+/* Chip-specific hook to enable the use of SDRAM */
+void chip_enable_sdram(void);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_SETUP_H__ */
diff --git a/include/asm-avr32/shmbuf.h b/include/asm-avr32/shmbuf.h
new file mode 100644
index 000000000000..c62fba41739a
--- /dev/null
+++ b/include/asm-avr32/shmbuf.h
@@ -0,0 +1,42 @@
+#ifndef __ASM_AVR32_SHMBUF_H
+#define __ASM_AVR32_SHMBUF_H
+
+/*
+ * The shmid64_ds structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct shmid64_ds {
+ struct ipc64_perm shm_perm; /* operation perms */
+ size_t shm_segsz; /* size of segment (bytes) */
+ __kernel_time_t shm_atime; /* last attach time */
+ unsigned long __unused1;
+ __kernel_time_t shm_dtime; /* last detach time */
+ unsigned long __unused2;
+ __kernel_time_t shm_ctime; /* last change time */
+ unsigned long __unused3;
+ __kernel_pid_t shm_cpid; /* pid of creator */
+ __kernel_pid_t shm_lpid; /* pid of last operator */
+ unsigned long shm_nattch; /* no. of current attaches */
+ unsigned long __unused4;
+ unsigned long __unused5;
+};
+
+struct shminfo64 {
+ unsigned long shmmax;
+ unsigned long shmmin;
+ unsigned long shmmni;
+ unsigned long shmseg;
+ unsigned long shmall;
+ unsigned long __unused1;
+ unsigned long __unused2;
+ unsigned long __unused3;
+ unsigned long __unused4;
+};
+
+#endif /* __ASM_AVR32_SHMBUF_H */
diff --git a/include/asm-avr32/shmparam.h b/include/asm-avr32/shmparam.h
new file mode 100644
index 000000000000..3681266c77f7
--- /dev/null
+++ b/include/asm-avr32/shmparam.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_SHMPARAM_H
+#define __ASM_AVR32_SHMPARAM_H
+
+#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
+
+#endif /* __ASM_AVR32_SHMPARAM_H */
diff --git a/include/asm-avr32/sigcontext.h b/include/asm-avr32/sigcontext.h
new file mode 100644
index 000000000000..e04062b5f39f
--- /dev/null
+++ b/include/asm-avr32/sigcontext.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SIGCONTEXT_H
+#define __ASM_AVR32_SIGCONTEXT_H
+
+struct sigcontext {
+ unsigned long oldmask;
+
+ /* CPU registers */
+ unsigned long sr;
+ unsigned long pc;
+ unsigned long lr;
+ unsigned long sp;
+ unsigned long r12;
+ unsigned long r11;
+ unsigned long r10;
+ unsigned long r9;
+ unsigned long r8;
+ unsigned long r7;
+ unsigned long r6;
+ unsigned long r5;
+ unsigned long r4;
+ unsigned long r3;
+ unsigned long r2;
+ unsigned long r1;
+ unsigned long r0;
+};
+
+#endif /* __ASM_AVR32_SIGCONTEXT_H */
diff --git a/include/asm-avr32/siginfo.h b/include/asm-avr32/siginfo.h
new file mode 100644
index 000000000000..5ee93f40a8a8
--- /dev/null
+++ b/include/asm-avr32/siginfo.h
@@ -0,0 +1,6 @@
+#ifndef _AVR32_SIGINFO_H
+#define _AVR32_SIGINFO_H
+
+#include <asm-generic/siginfo.h>
+
+#endif
diff --git a/include/asm-avr32/signal.h b/include/asm-avr32/signal.h
new file mode 100644
index 000000000000..caffefeeba1f
--- /dev/null
+++ b/include/asm-avr32/signal.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SIGNAL_H
+#define __ASM_AVR32_SIGNAL_H
+
+#include <linux/types.h>
+
+/* Avoid too many header ordering problems. */
+struct siginfo;
+
+#ifdef __KERNEL__
+/* Most things should be clean enough to redefine this at will, if care
+ is taken to make libc match. */
+
+#define _NSIG 64
+#define _NSIG_BPW 32
+#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
+
+typedef unsigned long old_sigset_t; /* at least 32 bits */
+
+typedef struct {
+ unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+#else
+/* Here we must cater to libcs that poke about in kernel headers. */
+
+#define NSIG 32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP 1
+#define SIGINT 2
+#define SIGQUIT 3
+#define SIGILL 4
+#define SIGTRAP 5
+#define SIGABRT 6
+#define SIGIOT 6
+#define SIGBUS 7
+#define SIGFPE 8
+#define SIGKILL 9
+#define SIGUSR1 10
+#define SIGSEGV 11
+#define SIGUSR2 12
+#define SIGPIPE 13
+#define SIGALRM 14
+#define SIGTERM 15
+#define SIGSTKFLT 16
+#define SIGCHLD 17
+#define SIGCONT 18
+#define SIGSTOP 19
+#define SIGTSTP 20
+#define SIGTTIN 21
+#define SIGTTOU 22
+#define SIGURG 23
+#define SIGXCPU 24
+#define SIGXFSZ 25
+#define SIGVTALRM 26
+#define SIGPROF 27
+#define SIGWINCH 28
+#define SIGIO 29
+#define SIGPOLL SIGIO
+/*
+#define SIGLOST 29
+*/
+#define SIGPWR 30
+#define SIGSYS 31
+#define SIGUNUSED 31
+
+/* These should not be considered constants from userland. */
+#define SIGRTMIN 32
+#define SIGRTMAX (_NSIG-1)
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_SIGINFO deliver the signal with SIGINFO structs
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP 0x00000001
+#define SA_NOCLDWAIT 0x00000002
+#define SA_SIGINFO 0x00000004
+#define SA_RESTORER 0x04000000
+#define SA_ONSTACK 0x08000000
+#define SA_RESTART 0x10000000
+#define SA_NODEFER 0x40000000
+#define SA_RESETHAND 0x80000000
+
+#define SA_NOMASK SA_NODEFER
+#define SA_ONESHOT SA_RESETHAND
+
+/*
+ * sigaltstack controls
+ */
+#define SS_ONSTACK 1
+#define SS_DISABLE 2
+
+#define MINSIGSTKSZ 2048
+#define SIGSTKSZ 8192
+
+#include <asm-generic/signal.h>
+
+#ifdef __KERNEL__
+struct old_sigaction {
+ __sighandler_t sa_handler;
+ old_sigset_t sa_mask;
+ unsigned long sa_flags;
+ __sigrestore_t sa_restorer;
+};
+
+struct sigaction {
+ __sighandler_t sa_handler;
+ unsigned long sa_flags;
+ __sigrestore_t sa_restorer;
+ sigset_t sa_mask; /* mask last for extensibility */
+};
+
+struct k_sigaction {
+ struct sigaction sa;
+};
+#else
+/* Here we must cater to libcs that poke about in kernel headers. */
+
+struct sigaction {
+ union {
+ __sighandler_t _sa_handler;
+ void (*_sa_sigaction)(int, struct siginfo *, void *);
+ } _u;
+ sigset_t sa_mask;
+ unsigned long sa_flags;
+ void (*sa_restorer)(void);
+};
+
+#define sa_handler _u._sa_handler
+#define sa_sigaction _u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+ void __user *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+} stack_t;
+
+#ifdef __KERNEL__
+
+#include <asm/sigcontext.h>
+#undef __HAVE_ARCH_SIG_BITOPS
+
+#define ptrace_signal_deliver(regs, cookie) do { } while (0)
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/include/asm-avr32/socket.h b/include/asm-avr32/socket.h
new file mode 100644
index 000000000000..543229de8173
--- /dev/null
+++ b/include/asm-avr32/socket.h
@@ -0,0 +1,53 @@
+#ifndef __ASM_AVR32_SOCKET_H
+#define __ASM_AVR32_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET 1
+
+#define SO_DEBUG 1
+#define SO_REUSEADDR 2
+#define SO_TYPE 3
+#define SO_ERROR 4
+#define SO_DONTROUTE 5
+#define SO_BROADCAST 6
+#define SO_SNDBUF 7
+#define SO_RCVBUF 8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
+#define SO_KEEPALIVE 9
+#define SO_OOBINLINE 10
+#define SO_NO_CHECK 11
+#define SO_PRIORITY 12
+#define SO_LINGER 13
+#define SO_BSDCOMPAT 14
+/* To add :#define SO_REUSEPORT 15 */
+#define SO_PASSCRED 16
+#define SO_PEERCRED 17
+#define SO_RCVLOWAT 18
+#define SO_SNDLOWAT 19
+#define SO_RCVTIMEO 20
+#define SO_SNDTIMEO 21
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION 22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT 23
+#define SO_SECURITY_ENCRYPTION_NETWORK 24
+
+#define SO_BINDTODEVICE 25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER 26
+#define SO_DETACH_FILTER 27
+
+#define SO_PEERNAME 28
+#define SO_TIMESTAMP 29
+#define SCM_TIMESTAMP SO_TIMESTAMP
+
+#define SO_ACCEPTCONN 30
+
+#define SO_PEERSEC 31
+#define SO_PASSSEC 34
+
+#endif /* __ASM_AVR32_SOCKET_H */
diff --git a/include/asm-avr32/sockios.h b/include/asm-avr32/sockios.h
new file mode 100644
index 000000000000..84f3d65b3b3b
--- /dev/null
+++ b/include/asm-avr32/sockios.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_AVR32_SOCKIOS_H
+#define __ASM_AVR32_SOCKIOS_H
+
+/* Socket-level I/O control calls. */
+#define FIOSETOWN 0x8901
+#define SIOCSPGRP 0x8902
+#define FIOGETOWN 0x8903
+#define SIOCGPGRP 0x8904
+#define SIOCATMARK 0x8905
+#define SIOCGSTAMP 0x8906 /* Get stamp */
+
+#endif /* __ASM_AVR32_SOCKIOS_H */
diff --git a/include/asm-avr32/stat.h b/include/asm-avr32/stat.h
new file mode 100644
index 000000000000..e72881e10230
--- /dev/null
+++ b/include/asm-avr32/stat.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_STAT_H
+#define __ASM_AVR32_STAT_H
+
+struct __old_kernel_stat {
+ unsigned short st_dev;
+ unsigned short st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+ unsigned short st_rdev;
+ unsigned long st_size;
+ unsigned long st_atime;
+ unsigned long st_mtime;
+ unsigned long st_ctime;
+};
+
+struct stat {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+ unsigned long st_rdev;
+ unsigned long st_size;
+ unsigned long st_blksize;
+ unsigned long st_blocks;
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned long __unused4;
+ unsigned long __unused5;
+};
+
+#define STAT_HAVE_NSEC 1
+
+struct stat64 {
+ unsigned long long st_dev;
+
+ unsigned long long st_ino;
+ unsigned int st_mode;
+ unsigned int st_nlink;
+
+ unsigned long st_uid;
+ unsigned long st_gid;
+
+ unsigned long long st_rdev;
+
+ long long st_size;
+ unsigned long __pad1; /* align 64-bit st_blocks */
+ unsigned long st_blksize;
+
+ unsigned long long st_blocks; /* Number 512-byte blocks allocated. */
+
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+
+ unsigned long __unused1;
+ unsigned long __unused2;
+};
+
+#endif /* __ASM_AVR32_STAT_H */
diff --git a/include/asm-avr32/statfs.h b/include/asm-avr32/statfs.h
new file mode 100644
index 000000000000..2961bd18c50e
--- /dev/null
+++ b/include/asm-avr32/statfs.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_STATFS_H
+#define __ASM_AVR32_STATFS_H
+
+#include <asm-generic/statfs.h>
+
+#endif /* __ASM_AVR32_STATFS_H */
diff --git a/include/asm-avr32/string.h b/include/asm-avr32/string.h
new file mode 100644
index 000000000000..c91a623cd585
--- /dev/null
+++ b/include/asm-avr32/string.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_STRING_H
+#define __ASM_AVR32_STRING_H
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *b, int c, size_t len);
+
+#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *to, const void *from, size_t len);
+
+#endif /* __ASM_AVR32_STRING_H */
diff --git a/include/asm-avr32/sysreg.h b/include/asm-avr32/sysreg.h
new file mode 100644
index 000000000000..f91975f330f6
--- /dev/null
+++ b/include/asm-avr32/sysreg.h
@@ -0,0 +1,332 @@
+/*
+ * AVR32 System Registers
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SYSREG_H__
+#define __ASM_AVR32_SYSREG_H__
+
+/* sysreg register offsets */
+#define SYSREG_SR 0x0000
+#define SYSREG_EVBA 0x0004
+#define SYSREG_ACBA 0x0008
+#define SYSREG_CPUCR 0x000c
+#define SYSREG_ECR 0x0010
+#define SYSREG_RSR_SUP 0x0014
+#define SYSREG_RSR_INT0 0x0018
+#define SYSREG_RSR_INT1 0x001c
+#define SYSREG_RSR_INT2 0x0020
+#define SYSREG_RSR_INT3 0x0024
+#define SYSREG_RSR_EX 0x0028
+#define SYSREG_RSR_NMI 0x002c
+#define SYSREG_RSR_DBG 0x0030
+#define SYSREG_RAR_SUP 0x0034
+#define SYSREG_RAR_INT0 0x0038
+#define SYSREG_RAR_INT1 0x003c
+#define SYSREG_RAR_INT2 0x0040
+#define SYSREG_RAR_INT3 0x0044
+#define SYSREG_RAR_EX 0x0048
+#define SYSREG_RAR_NMI 0x004c
+#define SYSREG_RAR_DBG 0x0050
+#define SYSREG_JECR 0x0054
+#define SYSREG_JOSP 0x0058
+#define SYSREG_JAVA_LV0 0x005c
+#define SYSREG_JAVA_LV1 0x0060
+#define SYSREG_JAVA_LV2 0x0064
+#define SYSREG_JAVA_LV3 0x0068
+#define SYSREG_JAVA_LV4 0x006c
+#define SYSREG_JAVA_LV5 0x0070
+#define SYSREG_JAVA_LV6 0x0074
+#define SYSREG_JAVA_LV7 0x0078
+#define SYSREG_JTBA 0x007c
+#define SYSREG_JBCR 0x0080
+#define SYSREG_CONFIG0 0x0100
+#define SYSREG_CONFIG1 0x0104
+#define SYSREG_COUNT 0x0108
+#define SYSREG_COMPARE 0x010c
+#define SYSREG_TLBEHI 0x0110
+#define SYSREG_TLBELO 0x0114
+#define SYSREG_PTBR 0x0118
+#define SYSREG_TLBEAR 0x011c
+#define SYSREG_MMUCR 0x0120
+#define SYSREG_TLBARLO 0x0124
+#define SYSREG_TLBARHI 0x0128
+#define SYSREG_PCCNT 0x012c
+#define SYSREG_PCNT0 0x0130
+#define SYSREG_PCNT1 0x0134
+#define SYSREG_PCCR 0x0138
+#define SYSREG_BEAR 0x013c
+
+/* Bitfields in SR */
+#define SYSREG_SR_C_OFFSET 0
+#define SYSREG_SR_C_SIZE 1
+#define SYSREG_Z_OFFSET 1
+#define SYSREG_Z_SIZE 1
+#define SYSREG_SR_N_OFFSET 2
+#define SYSREG_SR_N_SIZE 1
+#define SYSREG_SR_V_OFFSET 3
+#define SYSREG_SR_V_SIZE 1
+#define SYSREG_Q_OFFSET 4
+#define SYSREG_Q_SIZE 1
+#define SYSREG_GM_OFFSET 16
+#define SYSREG_GM_SIZE 1
+#define SYSREG_I0M_OFFSET 17
+#define SYSREG_I0M_SIZE 1
+#define SYSREG_I1M_OFFSET 18
+#define SYSREG_I1M_SIZE 1
+#define SYSREG_I2M_OFFSET 19
+#define SYSREG_I2M_SIZE 1
+#define SYSREG_I3M_OFFSET 20
+#define SYSREG_I3M_SIZE 1
+#define SYSREG_EM_OFFSET 21
+#define SYSREG_EM_SIZE 1
+#define SYSREG_M0_OFFSET 22
+#define SYSREG_M0_SIZE 1
+#define SYSREG_M1_OFFSET 23
+#define SYSREG_M1_SIZE 1
+#define SYSREG_M2_OFFSET 24
+#define SYSREG_M2_SIZE 1
+#define SYSREG_SR_D_OFFSET 26
+#define SYSREG_SR_D_SIZE 1
+#define SYSREG_DM_OFFSET 27
+#define SYSREG_DM_SIZE 1
+#define SYSREG_SR_J_OFFSET 28
+#define SYSREG_SR_J_SIZE 1
+#define SYSREG_R_OFFSET 29
+#define SYSREG_R_SIZE 1
+#define SYSREG_H_OFFSET 30
+#define SYSREG_H_SIZE 1
+
+/* Bitfields in EVBA */
+
+/* Bitfields in ACBA */
+
+/* Bitfields in CPUCR */
+#define SYSREG_BI_OFFSET 0
+#define SYSREG_BI_SIZE 1
+#define SYSREG_BE_OFFSET 1
+#define SYSREG_BE_SIZE 1
+#define SYSREG_FE_OFFSET 2
+#define SYSREG_FE_SIZE 1
+#define SYSREG_RE_OFFSET 3
+#define SYSREG_RE_SIZE 1
+#define SYSREG_IBE_OFFSET 4
+#define SYSREG_IBE_SIZE 1
+#define SYSREG_IEE_OFFSET 5
+#define SYSREG_IEE_SIZE 1
+
+/* Bitfields in ECR */
+#define SYSREG_ECR_OFFSET 0
+#define SYSREG_ECR_SIZE 32
+
+/* Bitfields in RSR_SUP */
+
+/* Bitfields in RSR_INT0 */
+
+/* Bitfields in RSR_INT1 */
+
+/* Bitfields in RSR_INT2 */
+
+/* Bitfields in RSR_INT3 */
+
+/* Bitfields in RSR_EX */
+
+/* Bitfields in RSR_NMI */
+
+/* Bitfields in RSR_DBG */
+
+/* Bitfields in RAR_SUP */
+
+/* Bitfields in RAR_INT0 */
+
+/* Bitfields in RAR_INT1 */
+
+/* Bitfields in RAR_INT2 */
+
+/* Bitfields in RAR_INT3 */
+
+/* Bitfields in RAR_EX */
+
+/* Bitfields in RAR_NMI */
+
+/* Bitfields in RAR_DBG */
+
+/* Bitfields in JECR */
+
+/* Bitfields in JOSP */
+
+/* Bitfields in JAVA_LV0 */
+
+/* Bitfields in JAVA_LV1 */
+
+/* Bitfields in JAVA_LV2 */
+
+/* Bitfields in JAVA_LV3 */
+
+/* Bitfields in JAVA_LV4 */
+
+/* Bitfields in JAVA_LV5 */
+
+/* Bitfields in JAVA_LV6 */
+
+/* Bitfields in JAVA_LV7 */
+
+/* Bitfields in JTBA */
+
+/* Bitfields in JBCR */
+
+/* Bitfields in CONFIG0 */
+#define SYSREG_CONFIG0_D_OFFSET 1
+#define SYSREG_CONFIG0_D_SIZE 1
+#define SYSREG_CONFIG0_S_OFFSET 2
+#define SYSREG_CONFIG0_S_SIZE 1
+#define SYSREG_O_OFFSET 3
+#define SYSREG_O_SIZE 1
+#define SYSREG_P_OFFSET 4
+#define SYSREG_P_SIZE 1
+#define SYSREG_CONFIG0_J_OFFSET 5
+#define SYSREG_CONFIG0_J_SIZE 1
+#define SYSREG_F_OFFSET 6
+#define SYSREG_F_SIZE 1
+#define SYSREG_MMUT_OFFSET 7
+#define SYSREG_MMUT_SIZE 3
+#define SYSREG_AR_OFFSET 10
+#define SYSREG_AR_SIZE 3
+#define SYSREG_AT_OFFSET 13
+#define SYSREG_AT_SIZE 3
+#define SYSREG_PROCESSORREVISION_OFFSET 16
+#define SYSREG_PROCESSORREVISION_SIZE 8
+#define SYSREG_PROCESSORID_OFFSET 24
+#define SYSREG_PROCESSORID_SIZE 8
+
+/* Bitfields in CONFIG1 */
+#define SYSREG_DASS_OFFSET 0
+#define SYSREG_DASS_SIZE 3
+#define SYSREG_DLSZ_OFFSET 3
+#define SYSREG_DLSZ_SIZE 3
+#define SYSREG_DSET_OFFSET 6
+#define SYSREG_DSET_SIZE 4
+#define SYSREG_IASS_OFFSET 10
+#define SYSREG_IASS_SIZE 2
+#define SYSREG_ILSZ_OFFSET 13
+#define SYSREG_ILSZ_SIZE 3
+#define SYSREG_ISET_OFFSET 16
+#define SYSREG_ISET_SIZE 4
+#define SYSREG_DMMUSZ_OFFSET 20
+#define SYSREG_DMMUSZ_SIZE 6
+#define SYSREG_IMMUSZ_OFFSET 26
+#define SYSREG_IMMUSZ_SIZE 6
+
+/* Bitfields in COUNT */
+
+/* Bitfields in COMPARE */
+
+/* Bitfields in TLBEHI */
+#define SYSREG_ASID_OFFSET 0
+#define SYSREG_ASID_SIZE 8
+#define SYSREG_TLBEHI_I_OFFSET 8
+#define SYSREG_TLBEHI_I_SIZE 1
+#define SYSREG_TLBEHI_V_OFFSET 9
+#define SYSREG_TLBEHI_V_SIZE 1
+#define SYSREG_VPN_OFFSET 10
+#define SYSREG_VPN_SIZE 22
+
+/* Bitfields in TLBELO */
+#define SYSREG_W_OFFSET 0
+#define SYSREG_W_SIZE 1
+#define SYSREG_TLBELO_D_OFFSET 1
+#define SYSREG_TLBELO_D_SIZE 1
+#define SYSREG_SZ_OFFSET 2
+#define SYSREG_SZ_SIZE 2
+#define SYSREG_AP_OFFSET 4
+#define SYSREG_AP_SIZE 3
+#define SYSREG_B_OFFSET 7
+#define SYSREG_B_SIZE 1
+#define SYSREG_G_OFFSET 8
+#define SYSREG_G_SIZE 1
+#define SYSREG_TLBELO_C_OFFSET 9
+#define SYSREG_TLBELO_C_SIZE 1
+#define SYSREG_PFN_OFFSET 10
+#define SYSREG_PFN_SIZE 22
+
+/* Bitfields in PTBR */
+
+/* Bitfields in TLBEAR */
+
+/* Bitfields in MMUCR */
+#define SYSREG_E_OFFSET 0
+#define SYSREG_E_SIZE 1
+#define SYSREG_M_OFFSET 1
+#define SYSREG_M_SIZE 1
+#define SYSREG_MMUCR_I_OFFSET 2
+#define SYSREG_MMUCR_I_SIZE 1
+#define SYSREG_MMUCR_N_OFFSET 3
+#define SYSREG_MMUCR_N_SIZE 1
+#define SYSREG_MMUCR_S_OFFSET 4
+#define SYSREG_MMUCR_S_SIZE 1
+#define SYSREG_DLA_OFFSET 8
+#define SYSREG_DLA_SIZE 6
+#define SYSREG_DRP_OFFSET 14
+#define SYSREG_DRP_SIZE 6
+#define SYSREG_ILA_OFFSET 20
+#define SYSREG_ILA_SIZE 6
+#define SYSREG_IRP_OFFSET 26
+#define SYSREG_IRP_SIZE 6
+
+/* Bitfields in TLBARLO */
+
+/* Bitfields in TLBARHI */
+
+/* Bitfields in PCCNT */
+
+/* Bitfields in PCNT0 */
+
+/* Bitfields in PCNT1 */
+
+/* Bitfields in PCCR */
+
+/* Bitfields in BEAR */
+
+/* Constants for ECR */
+#define ECR_UNRECOVERABLE 0
+#define ECR_TLB_MULTIPLE 1
+#define ECR_BUS_ERROR_WRITE 2
+#define ECR_BUS_ERROR_READ 3
+#define ECR_NMI 4
+#define ECR_ADDR_ALIGN_X 5
+#define ECR_PROTECTION_X 6
+#define ECR_DEBUG 7
+#define ECR_ILLEGAL_OPCODE 8
+#define ECR_UNIMPL_INSTRUCTION 9
+#define ECR_PRIVILEGE_VIOLATION 10
+#define ECR_FPE 11
+#define ECR_COPROC_ABSENT 12
+#define ECR_ADDR_ALIGN_R 13
+#define ECR_ADDR_ALIGN_W 14
+#define ECR_PROTECTION_R 15
+#define ECR_PROTECTION_W 16
+#define ECR_DTLB_MODIFIED 17
+#define ECR_TLB_MISS_X 20
+#define ECR_TLB_MISS_R 24
+#define ECR_TLB_MISS_W 28
+
+/* Bit manipulation macros */
+#define SYSREG_BIT(name) (1 << SYSREG_##name##_OFFSET)
+#define SYSREG_BF(name,value) (((value) & ((1 << SYSREG_##name##_SIZE) - 1)) << SYSREG_##name##_OFFSET)
+#define SYSREG_BFEXT(name,value) (((value) >> SYSREG_##name##_OFFSET) & ((1 << SYSREG_##name##_SIZE) - 1))
+#define SYSREG_BFINS(name,value,old) (((old) & ~(((1 << SYSREG_##name##_SIZE) - 1) << SYSREG_##name##_OFFSET)) | SYSREG_BF(name,value))
+
+#ifdef __CHECKER__
+extern unsigned long __builtin_mfsr(unsigned long reg);
+extern void __builtin_mtsr(unsigned long reg, unsigned long value);
+#endif
+
+/* Register access macros */
+#define sysreg_read(reg) __builtin_mfsr(SYSREG_##reg)
+#define sysreg_write(reg, value) __builtin_mtsr(SYSREG_##reg, value)
+
+#endif /* __ASM_AVR32_SYSREG_H__ */
diff --git a/include/asm-avr32/system.h b/include/asm-avr32/system.h
new file mode 100644
index 000000000000..ac596058697d
--- /dev/null
+++ b/include/asm-avr32/system.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SYSTEM_H
+#define __ASM_AVR32_SYSTEM_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+
+#define xchg(ptr,x) \
+ ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+
+#define nop() asm volatile("nop")
+
+#define mb() asm volatile("" : : : "memory")
+#define rmb() mb()
+#define wmb() asm volatile("sync 0" : : : "memory")
+#define read_barrier_depends() do { } while(0)
+#define set_mb(var, value) do { var = value; mb(); } while(0)
+
+/*
+ * Help PathFinder and other Nexus-compliant debuggers keep track of
+ * the current PID by emitting an Ownership Trace Message each time we
+ * switch task.
+ */
+#ifdef CONFIG_OWNERSHIP_TRACE
+#include <asm/ocd.h>
+#define finish_arch_switch(prev) \
+ do { \
+ __mtdr(DBGREG_PID, prev->pid); \
+ __mtdr(DBGREG_PID, current->pid); \
+ } while(0)
+#endif
+
+/*
+ * switch_to(prev, next, last) should switch from task `prev' to task
+ * `next'. `prev' will never be the same as `next'.
+ *
+ * We just delegate everything to the __switch_to assembly function,
+ * which is implemented in arch/avr32/kernel/switch_to.S
+ *
+ * mb() tells GCC not to cache `current' across this call.
+ */
+struct cpu_context;
+struct task_struct;
+extern struct task_struct *__switch_to(struct task_struct *,
+ struct cpu_context *,
+ struct cpu_context *);
+#define switch_to(prev, next, last) \
+ do { \
+ last = __switch_to(prev, &prev->thread.cpu_context + 1, \
+ &next->thread.cpu_context); \
+ } while (0)
+
+#ifdef CONFIG_SMP
+# error "The AVR32 port does not support SMP"
+#else
+# define smp_mb() barrier()
+# define smp_rmb() barrier()
+# define smp_wmb() barrier()
+# define smp_read_barrier_depends() do { } while(0)
+#endif
+
+#include <linux/irqflags.h>
+
+extern void __xchg_called_with_bad_pointer(void);
+
+#ifdef __CHECKER__
+extern unsigned long __builtin_xchg(void *ptr, unsigned long x);
+#endif
+
+#define xchg_u32(val, m) __builtin_xchg((void *)m, val)
+
+static inline unsigned long __xchg(unsigned long x,
+ volatile void *ptr,
+ int size)
+{
+ switch(size) {
+ case 4:
+ return xchg_u32(x, ptr);
+ default:
+ __xchg_called_with_bad_pointer();
+ return x;
+ }
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+ unsigned long new)
+{
+ __u32 ret;
+
+ asm volatile(
+ "1: ssrf 5\n"
+ " ld.w %[ret], %[m]\n"
+ " cp.w %[ret], %[old]\n"
+ " brne 2f\n"
+ " stcond %[m], %[new]\n"
+ " brne 1b\n"
+ "2:\n"
+ : [ret] "=&r"(ret), [m] "=m"(*m)
+ : "m"(m), [old] "ir"(old), [new] "r"(new)
+ : "memory", "cc");
+ return ret;
+}
+
+extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
+ volatile int * m, unsigned long old, unsigned long new);
+#define __cmpxchg_u64 __cmpxchg_u64_unsupported_on_32bit_kernels
+
+/* This function doesn't exist, so you'll get a linker error
+ if something tries to do an invalid cmpxchg(). */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ unsigned long new, int size)
+{
+ switch (size) {
+ case 4:
+ return __cmpxchg_u32(ptr, old, new);
+ case 8:
+ return __cmpxchg_u64(ptr, old, new);
+ }
+
+ __cmpxchg_called_with_bad_pointer();
+ return old;
+}
+
+#define cmpxchg(ptr, old, new) \
+ ((typeof(*(ptr)))__cmpxchg((ptr), (unsigned long)(old), \
+ (unsigned long)(new), \
+ sizeof(*(ptr))))
+
+struct pt_regs;
+extern void __die(const char *, struct pt_regs *, unsigned long,
+ const char *, const char *, unsigned long);
+extern void __die_if_kernel(const char *, struct pt_regs *, unsigned long,
+ const char *, const char *, unsigned long);
+
+#define die(msg, regs, err) \
+ __die(msg, regs, err, __FILE__ ":", __FUNCTION__, __LINE__)
+#define die_if_kernel(msg, regs, err) \
+ __die_if_kernel(msg, regs, err, __FILE__ ":", __FUNCTION__, __LINE__)
+
+#define arch_align_stack(x) (x)
+
+#endif /* __ASM_AVR32_SYSTEM_H */
diff --git a/include/asm-avr32/termbits.h b/include/asm-avr32/termbits.h
new file mode 100644
index 000000000000..9dc6eacafa33
--- /dev/null
+++ b/include/asm-avr32/termbits.h
@@ -0,0 +1,173 @@
+#ifndef __ASM_AVR32_TERMBITS_H
+#define __ASM_AVR32_TERMBITS_H
+
+#include <linux/posix_types.h>
+
+typedef unsigned char cc_t;
+typedef unsigned int speed_t;
+typedef unsigned int tcflag_t;
+
+#define NCCS 19
+struct termios {
+ tcflag_t c_iflag; /* input mode flags */
+ tcflag_t c_oflag; /* output mode flags */
+ tcflag_t c_cflag; /* control mode flags */
+ tcflag_t c_lflag; /* local mode flags */
+ cc_t c_line; /* line discipline */
+ cc_t c_cc[NCCS]; /* control characters */
+};
+
+/* c_cc characters */
+#define VINTR 0
+#define VQUIT 1
+#define VERASE 2
+#define VKILL 3
+#define VEOF 4
+#define VTIME 5
+#define VMIN 6
+#define VSWTC 7
+#define VSTART 8
+#define VSTOP 9
+#define VSUSP 10
+#define VEOL 11
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE 14
+#define VLNEXT 15
+#define VEOL2 16
+
+/* c_iflag bits */
+#define IGNBRK 0000001
+#define BRKINT 0000002
+#define IGNPAR 0000004
+#define PARMRK 0000010
+#define INPCK 0000020
+#define ISTRIP 0000040
+#define INLCR 0000100
+#define IGNCR 0000200
+#define ICRNL 0000400
+#define IUCLC 0001000
+#define IXON 0002000
+#define IXANY 0004000
+#define IXOFF 0010000
+#define IMAXBEL 0020000
+#define IUTF8 0040000
+
+/* c_oflag bits */
+#define OPOST 0000001
+#define OLCUC 0000002
+#define ONLCR 0000004
+#define OCRNL 0000010
+#define ONOCR 0000020
+#define ONLRET 0000040
+#define OFILL 0000100
+#define OFDEL 0000200
+#define NLDLY 0000400
+#define NL0 0000000
+#define NL1 0000400
+#define CRDLY 0003000
+#define CR0 0000000
+#define CR1 0001000
+#define CR2 0002000
+#define CR3 0003000
+#define TABDLY 0014000
+#define TAB0 0000000
+#define TAB1 0004000
+#define TAB2 0010000
+#define TAB3 0014000
+#define XTABS 0014000
+#define BSDLY 0020000
+#define BS0 0000000
+#define BS1 0020000
+#define VTDLY 0040000
+#define VT0 0000000
+#define VT1 0040000
+#define FFDLY 0100000
+#define FF0 0000000
+#define FF1 0100000
+
+/* c_cflag bit meaning */
+#define CBAUD 0010017
+#define B0 0000000 /* hang up */
+#define B50 0000001
+#define B75 0000002
+#define B110 0000003
+#define B134 0000004
+#define B150 0000005
+#define B200 0000006
+#define B300 0000007
+#define B600 0000010
+#define B1200 0000011
+#define B1800 0000012
+#define B2400 0000013
+#define B4800 0000014
+#define B9600 0000015
+#define B19200 0000016
+#define B38400 0000017
+#define EXTA B19200
+#define EXTB B38400
+#define CSIZE 0000060
+#define CS5 0000000
+#define CS6 0000020
+#define CS7 0000040
+#define CS8 0000060
+#define CSTOPB 0000100
+#define CREAD 0000200
+#define PARENB 0000400
+#define PARODD 0001000
+#define HUPCL 0002000
+#define CLOCAL 0004000
+#define CBAUDEX 0010000
+#define B57600 0010001
+#define B115200 0010002
+#define B230400 0010003
+#define B460800 0010004
+#define B500000 0010005
+#define B576000 0010006
+#define B921600 0010007
+#define B1000000 0010010
+#define B1152000 0010011
+#define B1500000 0010012
+#define B2000000 0010013
+#define B2500000 0010014
+#define B3000000 0010015
+#define B3500000 0010016
+#define B4000000 0010017
+#define CIBAUD 002003600000 /* input baud rate (not used) */
+#define CMSPAR 010000000000 /* mark or space (stick) parity */
+#define CRTSCTS 020000000000 /* flow control */
+
+/* c_lflag bits */
+#define ISIG 0000001
+#define ICANON 0000002
+#define XCASE 0000004
+#define ECHO 0000010
+#define ECHOE 0000020
+#define ECHOK 0000040
+#define ECHONL 0000100
+#define NOFLSH 0000200
+#define TOSTOP 0000400
+#define ECHOCTL 0001000
+#define ECHOPRT 0002000
+#define ECHOKE 0004000
+#define FLUSHO 0010000
+#define PENDIN 0040000
+#define IEXTEN 0100000
+
+/* tcflow() and TCXONC use these */
+#define TCOOFF 0
+#define TCOON 1
+#define TCIOFF 2
+#define TCION 3
+
+/* tcflush() and TCFLSH use these */
+#define TCIFLUSH 0
+#define TCOFLUSH 1
+#define TCIOFLUSH 2
+
+/* tcsetattr uses these */
+#define TCSANOW 0
+#define TCSADRAIN 1
+#define TCSAFLUSH 2
+
+#endif /* __ASM_AVR32_TERMBITS_H */
diff --git a/include/asm-avr32/termios.h b/include/asm-avr32/termios.h
new file mode 100644
index 000000000000..615bc0639e5c
--- /dev/null
+++ b/include/asm-avr32/termios.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TERMIOS_H
+#define __ASM_AVR32_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+ unsigned short ws_row;
+ unsigned short ws_col;
+ unsigned short ws_xpixel;
+ unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+ unsigned short c_iflag; /* input mode flags */
+ unsigned short c_oflag; /* output mode flags */
+ unsigned short c_cflag; /* control mode flags */
+ unsigned short c_lflag; /* local mode flags */
+ unsigned char c_line; /* line discipline */
+ unsigned char c_cc[NCC]; /* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE 0x001
+#define TIOCM_DTR 0x002
+#define TIOCM_RTS 0x004
+#define TIOCM_ST 0x008
+#define TIOCM_SR 0x010
+#define TIOCM_CTS 0x020
+#define TIOCM_CAR 0x040
+#define TIOCM_RNG 0x080
+#define TIOCM_DSR 0x100
+#define TIOCM_CD TIOCM_CAR
+#define TIOCM_RI TIOCM_RNG
+#define TIOCM_OUT1 0x2000
+#define TIOCM_OUT2 0x4000
+#define TIOCM_LOOP 0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+/* line disciplines */
+#define N_TTY 0
+#define N_SLIP 1
+#define N_MOUSE 2
+#define N_PPP 3
+#define N_STRIP 4
+#define N_AX25 5
+#define N_X25 6 /* X.25 async */
+#define N_6PACK 7
+#define N_MASC 8 /* Reserved for Mobitex module <kaz@cafe.net> */
+#define N_R3964 9 /* Reserved for Simatic R3964 module */
+#define N_PROFIBUS_FDL 10 /* Reserved for Profibus <Dave@mvhi.com> */
+#define N_IRDA 11 /* Linux IR - http://irda.sourceforge.net/ */
+#define N_SMSBLOCK 12 /* SMS block mode - for talking to GSM data cards about SMS messages */
+#define N_HDLC 13 /* synchronous HDLC */
+#define N_SYNC_PPP 14 /* synchronous PPP */
+#define N_HCI 15 /* Bluetooth HCI UART */
+
+#ifdef __KERNEL__
+/* intr=^C quit=^\ erase=del kill=^U
+ eof=^D vtime=\0 vmin=\1 sxtc=\0
+ start=^Q stop=^S susp=^Z eol=\0
+ reprint=^R discard=^U werase=^W lnext=^V
+ eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+#include <asm-generic/termios.h>
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_AVR32_TERMIOS_H */
diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h
new file mode 100644
index 000000000000..d1f5b35ebd54
--- /dev/null
+++ b/include/asm-avr32/thread_info.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_THREAD_INFO_H
+#define __ASM_AVR32_THREAD_INFO_H
+
+#include <asm/page.h>
+
+#define THREAD_SIZE_ORDER 1
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+
+#ifndef __ASSEMBLY__
+#include <asm/types.h>
+
+struct task_struct;
+struct exec_domain;
+
+struct thread_info {
+ struct task_struct *task; /* main task structure */
+ struct exec_domain *exec_domain; /* execution domain */
+ unsigned long flags; /* low level flags */
+ __u32 cpu;
+ __s32 preempt_count; /* 0 => preemptable, <0 => BUG */
+ struct restart_block restart_block;
+ __u8 supervisor_stack[0];
+};
+
+#define INIT_THREAD_INFO(tsk) \
+{ \
+ .task = &tsk, \
+ .exec_domain = &default_exec_domain, \
+ .flags = 0, \
+ .cpu = 0, \
+ .preempt_count = 1, \
+ .restart_block = { \
+ .fn = do_no_restart_syscall \
+ } \
+}
+
+#define init_thread_info (init_thread_union.thread_info)
+#define init_stack (init_thread_union.stack)
+
+/*
+ * Get the thread information struct from C.
+ * We do the usual trick and use the lower end of the stack for this
+ */
+static inline struct thread_info *current_thread_info(void)
+{
+ unsigned long addr = ~(THREAD_SIZE - 1);
+
+ asm("and %0, sp" : "=r"(addr) : "0"(addr));
+ return (struct thread_info *)addr;
+}
+
+/* thread information allocation */
+#define alloc_thread_info(ti) \
+ ((struct thread_info *) __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
+#define free_thread_info(ti) free_pages((unsigned long)(ti), 1)
+#define get_thread_info(ti) get_task_struct((ti)->task)
+#define put_thread_info(ti) put_task_struct((ti)->task)
+
+#endif /* !__ASSEMBLY__ */
+
+#define PREEMPT_ACTIVE 0x40000000
+
+/*
+ * Thread information flags
+ * - these are process state flags that various assembly files may need to access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ */
+#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
+#define TIF_NOTIFY_RESUME 1 /* resumption notification requested */
+#define TIF_SIGPENDING 2 /* signal pending */
+#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
+#define TIF_POLLING_NRFLAG 4 /* true if poll_idle() is polling
+ TIF_NEED_RESCHED */
+#define TIF_BREAKPOINT 5 /* true if we should break after return */
+#define TIF_SINGLE_STEP 6 /* single step after next break */
+#define TIF_MEMDIE 7
+#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal */
+#define TIF_USERSPACE 31 /* true if FS sets userspace */
+
+#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
+#define _TIF_BREAKPOINT (1 << TIF_BREAKPOINT)
+#define _TIF_SINGLE_STEP (1 << TIF_SINGLE_STEP)
+#define _TIF_MEMDIE (1 << TIF_MEMDIE)
+#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
+
+/* XXX: These two masks must never span more than 16 bits! */
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK 0x0000013e
+/* work to do on any return to userspace */
+#define _TIF_ALLWORK_MASK 0x0000013f
+/* work to do on return from debug mode */
+#define _TIF_DBGWORK_MASK 0x0000017e
+
+#endif /* __ASM_AVR32_THREAD_INFO_H */
diff --git a/include/asm-avr32/timex.h b/include/asm-avr32/timex.h
new file mode 100644
index 000000000000..5e44ecb3ce0c
--- /dev/null
+++ b/include/asm-avr32/timex.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TIMEX_H
+#define __ASM_AVR32_TIMEX_H
+
+/*
+ * This is the frequency of the timer used for Linux's timer interrupt.
+ * The value should be defined as accurate as possible or under certain
+ * circumstances Linux timekeeping might become inaccurate or fail.
+ *
+ * For many system the exact clockrate of the timer isn't known but due to
+ * the way this value is used we can get away with a wrong value as long
+ * as this value is:
+ *
+ * - a multiple of HZ
+ * - a divisor of the actual rate
+ *
+ * 500000 is a good such cheat value.
+ *
+ * The obscure number 1193182 is the same as used by the original i8254
+ * time in legacy PC hardware; the chip is never found in AVR32 systems.
+ */
+#define CLOCK_TICK_RATE 500000 /* Underlying HZ */
+
+typedef unsigned long cycles_t;
+
+static inline cycles_t get_cycles (void)
+{
+ return 0;
+}
+
+extern int read_current_timer(unsigned long *timer_value);
+#define ARCH_HAS_READ_CURRENT_TIMER 1
+
+#endif /* __ASM_AVR32_TIMEX_H */
diff --git a/include/asm-avr32/tlb.h b/include/asm-avr32/tlb.h
new file mode 100644
index 000000000000..5c55f9ce7c7d
--- /dev/null
+++ b/include/asm-avr32/tlb.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TLB_H
+#define __ASM_AVR32_TLB_H
+
+#define tlb_start_vma(tlb, vma) \
+ flush_cache_range(vma, vma->vm_start, vma->vm_end)
+
+#define tlb_end_vma(tlb, vma) \
+ flush_tlb_range(vma, vma->vm_start, vma->vm_end)
+
+#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while(0)
+
+/*
+ * Flush whole TLB for MM
+ */
+#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+/*
+ * For debugging purposes
+ */
+extern void show_dtlb_entry(unsigned int index);
+extern void dump_dtlb(void);
+
+#endif /* __ASM_AVR32_TLB_H */
diff --git a/include/asm-avr32/tlbflush.h b/include/asm-avr32/tlbflush.h
new file mode 100644
index 000000000000..730e268f81f3
--- /dev/null
+++ b/include/asm-avr32/tlbflush.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TLBFLUSH_H
+#define __ASM_AVR32_TLBFLUSH_H
+
+#include <asm/mmu.h>
+
+/*
+ * TLB flushing:
+ *
+ * - flush_tlb() flushes the current mm struct TLBs
+ * - flush_tlb_all() flushes all processes' TLB entries
+ * - flush_tlb_mm(mm) flushes the specified mm context TLBs
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+ * - flush_tlb_range(vma, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ */
+extern void flush_tlb(void);
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+extern void __flush_tlb_page(unsigned long asid, unsigned long page);
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ /* Nothing to do */
+}
+
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
+#endif /* __ASM_AVR32_TLBFLUSH_H */
diff --git a/include/asm-avr32/topology.h b/include/asm-avr32/topology.h
new file mode 100644
index 000000000000..5b766cbb4806
--- /dev/null
+++ b/include/asm-avr32/topology.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_TOPOLOGY_H
+#define __ASM_AVR32_TOPOLOGY_H
+
+#include <asm-generic/topology.h>
+
+#endif /* __ASM_AVR32_TOPOLOGY_H */
diff --git a/include/asm-avr32/traps.h b/include/asm-avr32/traps.h
new file mode 100644
index 000000000000..6a8fb944f414
--- /dev/null
+++ b/include/asm-avr32/traps.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TRAPS_H
+#define __ASM_AVR32_TRAPS_H
+
+#include <linux/list.h>
+
+struct undef_hook {
+ struct list_head node;
+ u32 insn_mask;
+ u32 insn_val;
+ int (*fn)(struct pt_regs *regs, u32 insn);
+};
+
+void register_undef_hook(struct undef_hook *hook);
+void unregister_undef_hook(struct undef_hook *hook);
+
+#endif /* __ASM_AVR32_TRAPS_H */
diff --git a/include/asm-avr32/types.h b/include/asm-avr32/types.h
new file mode 100644
index 000000000000..3f47db9675af
--- /dev/null
+++ b/include/asm-avr32/types.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TYPES_H
+#define __ASM_AVR32_TYPES_H
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned short umode_t;
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+#ifdef __KERNEL__
+
+#define BITS_PER_LONG 32
+
+#ifndef __ASSEMBLY__
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+/* Dma addresses are 32-bits wide. */
+
+typedef u32 dma_addr_t;
+
+#ifdef CONFIG_LBD
+typedef u64 sector_t;
+#define HAVE_SECTOR_T
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+
+#endif /* __ASM_AVR32_TYPES_H */
diff --git a/include/asm-avr32/uaccess.h b/include/asm-avr32/uaccess.h
new file mode 100644
index 000000000000..821deb5a9d28
--- /dev/null
+++ b/include/asm-avr32/uaccess.h
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_UACCESS_H
+#define __ASM_AVR32_UACCESS_H
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+typedef struct {
+ unsigned int is_user_space;
+} mm_segment_t;
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not. If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons (Data Segment Register?), these macros are misnamed.
+ */
+#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
+#define segment_eq(a,b) ((a).is_user_space == (b).is_user_space)
+
+#define USER_ADDR_LIMIT 0x80000000
+
+#define KERNEL_DS MAKE_MM_SEG(0)
+#define USER_DS MAKE_MM_SEG(1)
+
+#define get_ds() (KERNEL_DS)
+
+static inline mm_segment_t get_fs(void)
+{
+ return MAKE_MM_SEG(test_thread_flag(TIF_USERSPACE));
+}
+
+static inline void set_fs(mm_segment_t s)
+{
+ if (s.is_user_space)
+ set_thread_flag(TIF_USERSPACE);
+ else
+ clear_thread_flag(TIF_USERSPACE);
+}
+
+/*
+ * Test whether a block of memory is a valid user space address.
+ * Returns 0 if the range is valid, nonzero otherwise.
+ *
+ * We do the following checks:
+ * 1. Is the access from kernel space?
+ * 2. Does (addr + size) set the carry bit?
+ * 3. Is (addr + size) a negative number (i.e. >= 0x80000000)?
+ *
+ * If yes on the first check, access is granted.
+ * If no on any of the others, access is denied.
+ */
+#define __range_ok(addr, size) \
+ (test_thread_flag(TIF_USERSPACE) \
+ && (((unsigned long)(addr) >= 0x80000000) \
+ || ((unsigned long)(size) > 0x80000000) \
+ || (((unsigned long)(addr) + (unsigned long)(size)) > 0x80000000)))
+
+#define access_ok(type, addr, size) (likely(__range_ok(addr, size) == 0))
+
+static inline int
+verify_area(int type, const void __user *addr, unsigned long size)
+{
+ return access_ok(type, addr, size) ? 0 : -EFAULT;
+}
+
+/* Generic arbitrary sized copy. Return the number of bytes NOT copied */
+extern __kernel_size_t __copy_user(void *to, const void *from,
+ __kernel_size_t n);
+
+extern __kernel_size_t copy_to_user(void __user *to, const void *from,
+ __kernel_size_t n);
+extern __kernel_size_t copy_from_user(void *to, const void __user *from,
+ __kernel_size_t n);
+
+static inline __kernel_size_t __copy_to_user(void __user *to, const void *from,
+ __kernel_size_t n)
+{
+ return __copy_user((void __force *)to, from, n);
+}
+static inline __kernel_size_t __copy_from_user(void *to,
+ const void __user *from,
+ __kernel_size_t n)
+{
+ return __copy_user(to, (const void __force *)from, n);
+}
+
+#define __copy_to_user_inatomic __copy_to_user
+#define __copy_from_user_inatomic __copy_from_user
+
+/*
+ * put_user: - Write a simple value into user space.
+ * @x: Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+#define put_user(x,ptr) \
+ __put_user_check((x),(ptr),sizeof(*(ptr)))
+
+/*
+ * get_user: - Get a simple variable from user space.
+ * @x: Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define get_user(x,ptr) \
+ __get_user_check((x),(ptr),sizeof(*(ptr)))
+
+/*
+ * __put_user: - Write a simple value into user space, with less checking.
+ * @x: Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+#define __put_user(x,ptr) \
+ __put_user_nocheck((x),(ptr),sizeof(*(ptr)))
+
+/*
+ * __get_user: - Get a simple variable from user space, with less checking.
+ * @x: Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define __get_user(x,ptr) \
+ __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+
+extern int __get_user_bad(void);
+extern int __put_user_bad(void);
+
+#define __get_user_nocheck(x, ptr, size) \
+({ \
+ typeof(*(ptr)) __gu_val = (typeof(*(ptr)) __force)0; \
+ int __gu_err = 0; \
+ \
+ switch (size) { \
+ case 1: __get_user_asm("ub", __gu_val, ptr, __gu_err); break; \
+ case 2: __get_user_asm("uh", __gu_val, ptr, __gu_err); break; \
+ case 4: __get_user_asm("w", __gu_val, ptr, __gu_err); break; \
+ case 8: __get_user_asm("d", __gu_val, ptr, __gu_err); break; \
+ default: __gu_err = __get_user_bad(); break; \
+ } \
+ \
+ x = __gu_val; \
+ __gu_err; \
+})
+
+#define __get_user_check(x, ptr, size) \
+({ \
+ typeof(*(ptr)) __gu_val = (typeof(*(ptr)) __force)0; \
+ const typeof(*(ptr)) __user * __gu_addr = (ptr); \
+ int __gu_err = 0; \
+ \
+ if (access_ok(VERIFY_READ, __gu_addr, size)) { \
+ switch (size) { \
+ case 1: \
+ __get_user_asm("ub", __gu_val, __gu_addr, \
+ __gu_err); \
+ break; \
+ case 2: \
+ __get_user_asm("uh", __gu_val, __gu_addr, \
+ __gu_err); \
+ break; \
+ case 4: \
+ __get_user_asm("w", __gu_val, __gu_addr, \
+ __gu_err); \
+ break; \
+ case 8: \
+ __get_user_asm("d", __gu_val, __gu_addr, \
+ __gu_err); \
+ break; \
+ default: \
+ __gu_err = __get_user_bad(); \
+ break; \
+ } \
+ } else { \
+ __gu_err = -EFAULT; \
+ } \
+ x = __gu_val; \
+ __gu_err; \
+})
+
+#define __get_user_asm(suffix, __gu_val, ptr, __gu_err) \
+ asm volatile( \
+ "1: ld." suffix " %1, %3 \n" \
+ "2: \n" \
+ " .section .fixup, \"ax\" \n" \
+ "3: mov %0, %4 \n" \
+ " rjmp 2b \n" \
+ " .previous \n" \
+ " .section __ex_table, \"a\" \n" \
+ " .long 1b, 3b \n" \
+ " .previous \n" \
+ : "=r"(__gu_err), "=r"(__gu_val) \
+ : "0"(__gu_err), "m"(*(ptr)), "i"(-EFAULT))
+
+#define __put_user_nocheck(x, ptr, size) \
+({ \
+ typeof(*(ptr)) __pu_val; \
+ int __pu_err = 0; \
+ \
+ __pu_val = (x); \
+ switch (size) { \
+ case 1: __put_user_asm("b", ptr, __pu_val, __pu_err); break; \
+ case 2: __put_user_asm("h", ptr, __pu_val, __pu_err); break; \
+ case 4: __put_user_asm("w", ptr, __pu_val, __pu_err); break; \
+ case 8: __put_user_asm("d", ptr, __pu_val, __pu_err); break; \
+ default: __pu_err = __put_user_bad(); break; \
+ } \
+ __pu_err; \
+})
+
+#define __put_user_check(x, ptr, size) \
+({ \
+ typeof(*(ptr)) __pu_val; \
+ typeof(*(ptr)) __user *__pu_addr = (ptr); \
+ int __pu_err = 0; \
+ \
+ __pu_val = (x); \
+ if (access_ok(VERIFY_WRITE, __pu_addr, size)) { \
+ switch (size) { \
+ case 1: \
+ __put_user_asm("b", __pu_addr, __pu_val, \
+ __pu_err); \
+ break; \
+ case 2: \
+ __put_user_asm("h", __pu_addr, __pu_val, \
+ __pu_err); \
+ break; \
+ case 4: \
+ __put_user_asm("w", __pu_addr, __pu_val, \
+ __pu_err); \
+ break; \
+ case 8: \
+ __put_user_asm("d", __pu_addr, __pu_val, \
+ __pu_err); \
+ break; \
+ default: \
+ __pu_err = __put_user_bad(); \
+ break; \
+ } \
+ } else { \
+ __pu_err = -EFAULT; \
+ } \
+ __pu_err; \
+})
+
+#define __put_user_asm(suffix, ptr, __pu_val, __gu_err) \
+ asm volatile( \
+ "1: st." suffix " %1, %3 \n" \
+ "2: \n" \
+ " .section .fixup, \"ax\" \n" \
+ "3: mov %0, %4 \n" \
+ " rjmp 2b \n" \
+ " .previous \n" \
+ " .section __ex_table, \"a\" \n" \
+ " .long 1b, 3b \n" \
+ " .previous \n" \
+ : "=r"(__gu_err), "=m"(*(ptr)) \
+ : "0"(__gu_err), "r"(__pu_val), "i"(-EFAULT))
+
+extern __kernel_size_t clear_user(void __user *addr, __kernel_size_t size);
+extern __kernel_size_t __clear_user(void __user *addr, __kernel_size_t size);
+
+extern long strncpy_from_user(char *dst, const char __user *src, long count);
+extern long __strncpy_from_user(char *dst, const char __user *src, long count);
+
+extern long strnlen_user(const char __user *__s, long __n);
+extern long __strnlen_user(const char __user *__s, long __n);
+
+#define strlen_user(s) strnlen_user(s, ~0UL >> 1)
+
+struct exception_table_entry
+{
+ unsigned long insn, fixup;
+};
+
+#endif /* __ASM_AVR32_UACCESS_H */
diff --git a/include/asm-avr32/ucontext.h b/include/asm-avr32/ucontext.h
new file mode 100644
index 000000000000..ac7259c2a799
--- /dev/null
+++ b/include/asm-avr32/ucontext.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_AVR32_UCONTEXT_H
+#define __ASM_AVR32_UCONTEXT_H
+
+struct ucontext {
+ unsigned long uc_flags;
+ struct ucontext * uc_link;
+ stack_t uc_stack;
+ struct sigcontext uc_mcontext;
+ sigset_t uc_sigmask;
+};
+
+#endif /* __ASM_AVR32_UCONTEXT_H */
diff --git a/include/asm-avr32/unaligned.h b/include/asm-avr32/unaligned.h
new file mode 100644
index 000000000000..3042723fcbfd
--- /dev/null
+++ b/include/asm-avr32/unaligned.h
@@ -0,0 +1,25 @@
+#ifndef __ASM_AVR32_UNALIGNED_H
+#define __ASM_AVR32_UNALIGNED_H
+
+/*
+ * AVR32 can handle some unaligned accesses, depending on the
+ * implementation. The AVR32 AP implementation can handle unaligned
+ * words, but halfwords must be halfword-aligned, and doublewords must
+ * be word-aligned.
+ *
+ * TODO: Make all this CPU-specific and optimize.
+ */
+
+#include <linux/string.h>
+
+/* Use memmove here, so gcc does not insert a __builtin_memcpy. */
+
+#define get_unaligned(ptr) \
+ ({ __typeof__(*(ptr)) __tmp; memmove(&__tmp, (ptr), sizeof(*(ptr))); __tmp; })
+
+#define put_unaligned(val, ptr) \
+ ({ __typeof__(*(ptr)) __tmp = (val); \
+ memmove((ptr), &__tmp, sizeof(*(ptr))); \
+ (void)0; })
+
+#endif /* __ASM_AVR32_UNALIGNED_H */
diff --git a/include/asm-avr32/unistd.h b/include/asm-avr32/unistd.h
new file mode 100644
index 000000000000..1f528f92690d
--- /dev/null
+++ b/include/asm-avr32/unistd.h
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_UNISTD_H
+#define __ASM_AVR32_UNISTD_H
+
+/*
+ * This file contains the system call numbers.
+ */
+
+#define __NR_restart_syscall 0
+#define __NR_exit 1
+#define __NR_fork 2
+#define __NR_read 3
+#define __NR_write 4
+#define __NR_open 5
+#define __NR_close 6
+#define __NR_umask 7
+#define __NR_creat 8
+#define __NR_link 9
+#define __NR_unlink 10
+#define __NR_execve 11
+#define __NR_chdir 12
+#define __NR_time 13
+#define __NR_mknod 14
+#define __NR_chmod 15
+#define __NR_chown 16
+#define __NR_lchown 17
+#define __NR_lseek 18
+#define __NR__llseek 19
+#define __NR_getpid 20
+#define __NR_mount 21
+#define __NR_umount2 22
+#define __NR_setuid 23
+#define __NR_getuid 24
+#define __NR_stime 25
+#define __NR_ptrace 26
+#define __NR_alarm 27
+#define __NR_pause 28
+#define __NR_utime 29
+#define __NR_stat 30
+#define __NR_fstat 31
+#define __NR_lstat 32
+#define __NR_access 33
+#define __NR_chroot 34
+#define __NR_sync 35
+#define __NR_fsync 36
+#define __NR_kill 37
+#define __NR_rename 38
+#define __NR_mkdir 39
+#define __NR_rmdir 40
+#define __NR_dup 41
+#define __NR_pipe 42
+#define __NR_times 43
+#define __NR_clone 44
+#define __NR_brk 45
+#define __NR_setgid 46
+#define __NR_getgid 47
+#define __NR_getcwd 48
+#define __NR_geteuid 49
+#define __NR_getegid 50
+#define __NR_acct 51
+#define __NR_setfsuid 52
+#define __NR_setfsgid 53
+#define __NR_ioctl 54
+#define __NR_fcntl 55
+#define __NR_setpgid 56
+#define __NR_mremap 57
+#define __NR_setresuid 58
+#define __NR_getresuid 59
+#define __NR_setreuid 60
+#define __NR_setregid 61
+#define __NR_ustat 62
+#define __NR_dup2 63
+#define __NR_getppid 64
+#define __NR_getpgrp 65
+#define __NR_setsid 66
+#define __NR_rt_sigaction 67
+#define __NR_rt_sigreturn 68
+#define __NR_rt_sigprocmask 69
+#define __NR_rt_sigpending 70
+#define __NR_rt_sigtimedwait 71
+#define __NR_rt_sigqueueinfo 72
+#define __NR_rt_sigsuspend 73
+#define __NR_sethostname 74
+#define __NR_setrlimit 75
+#define __NR_getrlimit 76 /* SuS compliant getrlimit */
+#define __NR_getrusage 77
+#define __NR_gettimeofday 78
+#define __NR_settimeofday 79
+#define __NR_getgroups 80
+#define __NR_setgroups 81
+#define __NR_select 82
+#define __NR_symlink 83
+#define __NR_fchdir 84
+#define __NR_readlink 85
+#define __NR_pread 86
+#define __NR_pwrite 87
+#define __NR_swapon 88
+#define __NR_reboot 89
+#define __NR_mmap2 90
+#define __NR_munmap 91
+#define __NR_truncate 92
+#define __NR_ftruncate 93
+#define __NR_fchmod 94
+#define __NR_fchown 95
+#define __NR_getpriority 96
+#define __NR_setpriority 97
+#define __NR_wait4 98
+#define __NR_statfs 99
+#define __NR_fstatfs 100
+#define __NR_vhangup 101
+#define __NR_sigaltstack 102
+#define __NR_syslog 103
+#define __NR_setitimer 104
+#define __NR_getitimer 105
+#define __NR_swapoff 106
+#define __NR_sysinfo 107
+#define __NR_ipc 108
+#define __NR_sendfile 109
+#define __NR_setdomainname 110
+#define __NR_uname 111
+#define __NR_adjtimex 112
+#define __NR_mprotect 113
+#define __NR_vfork 114
+#define __NR_init_module 115
+#define __NR_delete_module 116
+#define __NR_quotactl 117
+#define __NR_getpgid 118
+#define __NR_bdflush 119
+#define __NR_sysfs 120
+#define __NR_personality 121
+#define __NR_afs_syscall 122 /* Syscall for Andrew File System */
+#define __NR_getdents 123
+#define __NR_flock 124
+#define __NR_msync 125
+#define __NR_readv 126
+#define __NR_writev 127
+#define __NR_getsid 128
+#define __NR_fdatasync 129
+#define __NR__sysctl 130
+#define __NR_mlock 131
+#define __NR_munlock 132
+#define __NR_mlockall 133
+#define __NR_munlockall 134
+#define __NR_sched_setparam 135
+#define __NR_sched_getparam 136
+#define __NR_sched_setscheduler 137
+#define __NR_sched_getscheduler 138
+#define __NR_sched_yield 139
+#define __NR_sched_get_priority_max 140
+#define __NR_sched_get_priority_min 141
+#define __NR_sched_rr_get_interval 142
+#define __NR_nanosleep 143
+#define __NR_poll 144
+#define __NR_nfsservctl 145
+#define __NR_setresgid 146
+#define __NR_getresgid 147
+#define __NR_prctl 148
+#define __NR_socket 149
+#define __NR_bind 150
+#define __NR_connect 151
+#define __NR_listen 152
+#define __NR_accept 153
+#define __NR_getsockname 154
+#define __NR_getpeername 155
+#define __NR_socketpair 156
+#define __NR_send 157
+#define __NR_recv 158
+#define __NR_sendto 159
+#define __NR_recvfrom 160
+#define __NR_shutdown 161
+#define __NR_setsockopt 162
+#define __NR_getsockopt 163
+#define __NR_sendmsg 164
+#define __NR_recvmsg 165
+#define __NR_truncate64 166
+#define __NR_ftruncate64 167
+#define __NR_stat64 168
+#define __NR_lstat64 169
+#define __NR_fstat64 170
+#define __NR_pivot_root 171
+#define __NR_mincore 172
+#define __NR_madvise 173
+#define __NR_getdents64 174
+#define __NR_fcntl64 175
+#define __NR_gettid 176
+#define __NR_readahead 177
+#define __NR_setxattr 178
+#define __NR_lsetxattr 179
+#define __NR_fsetxattr 180
+#define __NR_getxattr 181
+#define __NR_lgetxattr 182
+#define __NR_fgetxattr 183
+#define __NR_listxattr 184
+#define __NR_llistxattr 185
+#define __NR_flistxattr 186
+#define __NR_removexattr 187
+#define __NR_lremovexattr 188
+#define __NR_fremovexattr 189
+#define __NR_tkill 190
+#define __NR_sendfile64 191
+#define __NR_futex 192
+#define __NR_sched_setaffinity 193
+#define __NR_sched_getaffinity 194
+#define __NR_capget 195
+#define __NR_capset 196
+#define __NR_io_setup 197
+#define __NR_io_destroy 198
+#define __NR_io_getevents 199
+#define __NR_io_submit 200
+#define __NR_io_cancel 201
+#define __NR_fadvise64 202
+#define __NR_exit_group 203
+#define __NR_lookup_dcookie 204
+#define __NR_epoll_create 205
+#define __NR_epoll_ctl 206
+#define __NR_epoll_wait 207
+#define __NR_remap_file_pages 208
+#define __NR_set_tid_address 209
+
+#define __NR_timer_create 210
+#define __NR_timer_settime 211
+#define __NR_timer_gettime 212
+#define __NR_timer_getoverrun 213
+#define __NR_timer_delete 214
+#define __NR_clock_settime 215
+#define __NR_clock_gettime 216
+#define __NR_clock_getres 217
+#define __NR_clock_nanosleep 218
+#define __NR_statfs64 219
+#define __NR_fstatfs64 220
+#define __NR_tgkill 221
+ /* 222 reserved for tux */
+#define __NR_utimes 223
+#define __NR_fadvise64_64 224
+
+#define __NR_cacheflush 225
+
+#define __NR_vserver 226
+#define __NR_mq_open 227
+#define __NR_mq_unlink 228
+#define __NR_mq_timedsend 229
+#define __NR_mq_timedreceive 230
+#define __NR_mq_notify 231
+#define __NR_mq_getsetattr 232
+#define __NR_kexec_load 233
+#define __NR_waitid 234
+#define __NR_add_key 235
+#define __NR_request_key 236
+#define __NR_keyctl 237
+#define __NR_ioprio_set 238
+#define __NR_ioprio_get 239
+#define __NR_inotify_init 240
+#define __NR_inotify_add_watch 241
+#define __NR_inotify_rm_watch 242
+#define __NR_openat 243
+#define __NR_mkdirat 244
+#define __NR_mknodat 245
+#define __NR_fchownat 246
+#define __NR_futimesat 247
+#define __NR_fstatat64 248
+#define __NR_unlinkat 249
+#define __NR_renameat 250
+#define __NR_linkat 251
+#define __NR_symlinkat 252
+#define __NR_readlinkat 253
+#define __NR_fchmodat 254
+#define __NR_faccessat 255
+#define __NR_pselect6 256
+#define __NR_ppoll 257
+#define __NR_unshare 258
+#define __NR_set_robust_list 259
+#define __NR_get_robust_list 260
+#define __NR_splice 261
+#define __NR_sync_file_range 262
+#define __NR_tee 263
+#define __NR_vmsplice 264
+
+#define NR_syscalls 265
+
+
+/*
+ * AVR32 calling convention for system calls:
+ * - System call number in r8
+ * - Parameters in r12 and downwards to r9 as well as r6 and r5.
+ * - Return value in r12
+ */
+
+/*
+ * user-visible error numbers are in the range -1 - -124: see
+ * <asm-generic/errno.h>
+ */
+
+#define __syscall_return(type, res) do { \
+ if ((unsigned long)(res) >= (unsigned long)(-125)) { \
+ errno = -(res); \
+ res = -1; \
+ } \
+ return (type) (res); \
+ } while (0)
+
+#ifdef __KERNEL__
+#define __ARCH_WANT_IPC_PARSE_VERSION
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SYS_ALARM
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_PAUSE
+#define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_WAITPID
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_RT_SIGACTION
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#endif
+
+#if defined(__KERNEL_SYSCALLS__) || defined(__CHECKER__)
+
+#include <linux/types.h>
+#include <linux/linkage.h>
+#include <asm/signal.h>
+
+struct pt_regs;
+
+/*
+ * we need this inline - forking from kernel space will result
+ * in NO COPY ON WRITE (!!!), until an execve is executed. This
+ * is no problem, but for the stack. This is handled by not letting
+ * main() use the stack at all after fork(). Thus, no function
+ * calls - which means inline code for fork too, as otherwise we
+ * would use the stack upon exit from 'fork()'.
+ *
+ * Actually only pause and fork are needed inline, so that there
+ * won't be any messing with the stack from main(), but we define
+ * some others too.
+ */
+static inline int execve(const char *file, char **argv, char **envp)
+{
+ register long scno asm("r8") = __NR_execve;
+ register long sc1 asm("r12") = (long)file;
+ register long sc2 asm("r11") = (long)argv;
+ register long sc3 asm("r10") = (long)envp;
+ int res;
+
+ asm volatile("scall"
+ : "=r"(sc1)
+ : "r"(scno), "0"(sc1), "r"(sc2), "r"(sc3)
+ : "lr", "memory");
+ res = sc1;
+ __syscall_return(int, res);
+}
+
+asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
+asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+ struct pt_regs *regs);
+asmlinkage int sys_rt_sigreturn(struct pt_regs *regs);
+asmlinkage int sys_pipe(unsigned long __user *filedes);
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, off_t offset);
+asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len);
+asmlinkage int sys_fork(struct pt_regs *regs);
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+ unsigned long parent_tidptr,
+ unsigned long child_tidptr, struct pt_regs *regs);
+asmlinkage int sys_vfork(struct pt_regs *regs);
+asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv,
+ char __user *__user *uenvp, struct pt_regs *regs);
+
+#endif
+
+/*
+ * "Conditional" syscalls
+ *
+ * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
+ * but it doesn't work on all toolchains, so we just do it by hand
+ */
+#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall");
+
+#endif /* __ASM_AVR32_UNISTD_H */
diff --git a/include/asm-avr32/user.h b/include/asm-avr32/user.h
new file mode 100644
index 000000000000..060fb3acee49
--- /dev/null
+++ b/include/asm-avr32/user.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Note: We may not need these definitions for AVR32, as we don't
+ * support a.out.
+ */
+#ifndef __ASM_AVR32_USER_H
+#define __ASM_AVR32_USER_H
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+
+/*
+ * Core file format: The core file is written in such a way that gdb
+ * can understand it and provide useful information to the user (under
+ * linux we use the `trad-core' bfd). The file contents are as follows:
+ *
+ * upage: 1 page consisting of a user struct that tells gdb
+ * what is present in the file. Directly after this is a
+ * copy of the task_struct, which is currently not used by gdb,
+ * but it may come in handy at some point. All of the registers
+ * are stored as part of the upage. The upage should always be
+ * only one page long.
+ * data: The data segment follows next. We use current->end_text to
+ * current->brk to pick up all of the user variables, plus any memory
+ * that may have been sbrk'ed. No attempt is made to determine if a
+ * page is demand-zero or if a page is totally unused, we just cover
+ * the entire range. All of the addresses are rounded in such a way
+ * that an integral number of pages is written.
+ * stack: We need the stack information in order to get a meaningful
+ * backtrace. We need to write the data from usp to
+ * current->start_stack, so we round each of these in order to be able
+ * to write an integer number of pages.
+ */
+
+struct user_fpu_struct {
+ /* We have no FPU (yet) */
+};
+
+struct user {
+ struct pt_regs regs; /* entire machine state */
+ size_t u_tsize; /* text size (pages) */
+ size_t u_dsize; /* data size (pages) */
+ size_t u_ssize; /* stack size (pages) */
+ unsigned long start_code; /* text starting address */
+ unsigned long start_data; /* data starting address */
+ unsigned long start_stack; /* stack starting address */
+ long int signal; /* signal causing core dump */
+ struct regs * u_ar0; /* help gdb find registers */
+ unsigned long magic; /* identifies a core file */
+ char u_comm[32]; /* user command name */
+};
+
+#define NBPG PAGE_SIZE
+#define UPAGES 1
+#define HOST_TEXT_START_ADDR (u.start_code)
+#define HOST_DATA_START_ADDR (u.start_data)
+#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
+
+#endif /* __ASM_AVR32_USER_H */
diff --git a/include/asm-cris/pgtable.h b/include/asm-cris/pgtable.h
index 5d76c1c0d6c9..c94a7107019c 100644
--- a/include/asm-cris/pgtable.h
+++ b/include/asm-cris/pgtable.h
@@ -253,7 +253,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
{ pmd_val(*pmdp) = _PAGE_TABLE | (unsigned long) ptep; }
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
/* to find an entry in a page-table-directory. */
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
@@ -271,7 +271,7 @@ static inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address)
#define __pte_offset(address) \
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset_kernel(dir, address) \
- ((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address))
+ ((pte_t *) pmd_page_vaddr(*(dir)) + __pte_offset(address))
#define pte_offset_map(dir, address) \
((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h
index 980ae1b0cd28..1f70d47148bd 100644
--- a/include/asm-frv/bitops.h
+++ b/include/asm-frv/bitops.h
@@ -157,23 +157,105 @@ static inline int __test_bit(int nr, const volatile void * addr)
__constant_test_bit((nr),(addr)) : \
__test_bit((nr),(addr)))
-#include <asm-generic/bitops/ffs.h>
-#include <asm-generic/bitops/__ffs.h>
#include <asm-generic/bitops/find.h>
-/*
- * fls: find last bit set.
+/**
+ * fls - find last bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs:
+ * - return 32..1 to indicate bit 31..0 most significant bit set
+ * - return 0 to indicate no bits set
*/
#define fls(x) \
({ \
int bit; \
\
- asm("scan %1,gr0,%0" : "=r"(bit) : "r"(x)); \
+ asm(" subcc %1,gr0,gr0,icc0 \n" \
+ " ckne icc0,cc4 \n" \
+ " cscan.p %1,gr0,%0 ,cc4,#1 \n" \
+ " csub %0,%0,%0 ,cc4,#0 \n" \
+ " csub %2,%0,%0 ,cc4,#1 \n" \
+ : "=&r"(bit) \
+ : "r"(x), "r"(32) \
+ : "icc0", "cc4" \
+ ); \
\
- bit ? 33 - bit : bit; \
+ bit; \
})
-#include <asm-generic/bitops/fls64.h>
+/**
+ * fls64 - find last bit set in a 64-bit value
+ * @n: the value to search
+ *
+ * This is defined the same way as ffs:
+ * - return 64..1 to indicate bit 63..0 most significant bit set
+ * - return 0 to indicate no bits set
+ */
+static inline __attribute__((const))
+int fls64(u64 n)
+{
+ union {
+ u64 ll;
+ struct { u32 h, l; };
+ } _;
+ int bit, x, y;
+
+ _.ll = n;
+
+ asm(" subcc.p %3,gr0,gr0,icc0 \n"
+ " subcc %4,gr0,gr0,icc1 \n"
+ " ckne icc0,cc4 \n"
+ " ckne icc1,cc5 \n"
+ " norcr cc4,cc5,cc6 \n"
+ " csub.p %0,%0,%0 ,cc6,1 \n"
+ " orcr cc5,cc4,cc4 \n"
+ " andcr cc4,cc5,cc4 \n"
+ " cscan.p %3,gr0,%0 ,cc4,0 \n"
+ " setlos #64,%1 \n"
+ " cscan.p %4,gr0,%0 ,cc4,1 \n"
+ " setlos #32,%2 \n"
+ " csub.p %1,%0,%0 ,cc4,0 \n"
+ " csub %2,%0,%0 ,cc4,1 \n"
+ : "=&r"(bit), "=r"(x), "=r"(y)
+ : "0r"(_.h), "r"(_.l)
+ : "icc0", "icc1", "cc4", "cc5", "cc6"
+ );
+ return bit;
+
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * - return 32..1 to indicate bit 31..0 most least significant bit set
+ * - return 0 to indicate no bits set
+ */
+static inline __attribute__((const))
+int ffs(int x)
+{
+ /* Note: (x & -x) gives us a mask that is the least significant
+ * (rightmost) 1-bit of the value in x.
+ */
+ return fls(x & -x);
+}
+
+/**
+ * __ffs - find first bit set
+ * @x: the word to search
+ *
+ * - return 31..0 to indicate bit 31..0 most least significant bit set
+ * - if no bits are set in x, the result is undefined
+ */
+static inline __attribute__((const))
+int __ffs(unsigned long x)
+{
+ int bit;
+ asm("scan %1,gr0,%0" : "=r"(bit) : "r"(x & -x));
+ return 31 - bit;
+}
+
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
diff --git a/include/asm-frv/cpu-irqs.h b/include/asm-frv/cpu-irqs.h
index 5cd691e1f8c4..478f3498fcfe 100644
--- a/include/asm-frv/cpu-irqs.h
+++ b/include/asm-frv/cpu-irqs.h
@@ -14,36 +14,6 @@
#ifndef __ASSEMBLY__
-#include <asm/irq-routing.h>
-
-#define IRQ_BASE_CPU (NR_IRQ_ACTIONS_PER_GROUP * 0)
-
-/* IRQ IDs presented to drivers */
-enum {
- IRQ_CPU__UNUSED = IRQ_BASE_CPU,
- IRQ_CPU_UART0,
- IRQ_CPU_UART1,
- IRQ_CPU_TIMER0,
- IRQ_CPU_TIMER1,
- IRQ_CPU_TIMER2,
- IRQ_CPU_DMA0,
- IRQ_CPU_DMA1,
- IRQ_CPU_DMA2,
- IRQ_CPU_DMA3,
- IRQ_CPU_DMA4,
- IRQ_CPU_DMA5,
- IRQ_CPU_DMA6,
- IRQ_CPU_DMA7,
- IRQ_CPU_EXTERNAL0,
- IRQ_CPU_EXTERNAL1,
- IRQ_CPU_EXTERNAL2,
- IRQ_CPU_EXTERNAL3,
- IRQ_CPU_EXTERNAL4,
- IRQ_CPU_EXTERNAL5,
- IRQ_CPU_EXTERNAL6,
- IRQ_CPU_EXTERNAL7,
-};
-
/* IRQ to level mappings */
#define IRQ_GDBSTUB_LEVEL 15
#define IRQ_UART_LEVEL 13
@@ -82,6 +52,30 @@ enum {
#define IRQ_XIRQ6_LEVEL 7
#define IRQ_XIRQ7_LEVEL 8
+/* IRQ IDs presented to drivers */
+#define IRQ_CPU__UNUSED IRQ_BASE_CPU
+#define IRQ_CPU_UART0 (IRQ_BASE_CPU + IRQ_UART0_LEVEL)
+#define IRQ_CPU_UART1 (IRQ_BASE_CPU + IRQ_UART1_LEVEL)
+#define IRQ_CPU_TIMER0 (IRQ_BASE_CPU + IRQ_TIMER0_LEVEL)
+#define IRQ_CPU_TIMER1 (IRQ_BASE_CPU + IRQ_TIMER1_LEVEL)
+#define IRQ_CPU_TIMER2 (IRQ_BASE_CPU + IRQ_TIMER2_LEVEL)
+#define IRQ_CPU_DMA0 (IRQ_BASE_CPU + IRQ_DMA0_LEVEL)
+#define IRQ_CPU_DMA1 (IRQ_BASE_CPU + IRQ_DMA1_LEVEL)
+#define IRQ_CPU_DMA2 (IRQ_BASE_CPU + IRQ_DMA2_LEVEL)
+#define IRQ_CPU_DMA3 (IRQ_BASE_CPU + IRQ_DMA3_LEVEL)
+#define IRQ_CPU_DMA4 (IRQ_BASE_CPU + IRQ_DMA4_LEVEL)
+#define IRQ_CPU_DMA5 (IRQ_BASE_CPU + IRQ_DMA5_LEVEL)
+#define IRQ_CPU_DMA6 (IRQ_BASE_CPU + IRQ_DMA6_LEVEL)
+#define IRQ_CPU_DMA7 (IRQ_BASE_CPU + IRQ_DMA7_LEVEL)
+#define IRQ_CPU_EXTERNAL0 (IRQ_BASE_CPU + IRQ_XIRQ0_LEVEL)
+#define IRQ_CPU_EXTERNAL1 (IRQ_BASE_CPU + IRQ_XIRQ1_LEVEL)
+#define IRQ_CPU_EXTERNAL2 (IRQ_BASE_CPU + IRQ_XIRQ2_LEVEL)
+#define IRQ_CPU_EXTERNAL3 (IRQ_BASE_CPU + IRQ_XIRQ3_LEVEL)
+#define IRQ_CPU_EXTERNAL4 (IRQ_BASE_CPU + IRQ_XIRQ4_LEVEL)
+#define IRQ_CPU_EXTERNAL5 (IRQ_BASE_CPU + IRQ_XIRQ5_LEVEL)
+#define IRQ_CPU_EXTERNAL6 (IRQ_BASE_CPU + IRQ_XIRQ6_LEVEL)
+#define IRQ_CPU_EXTERNAL7 (IRQ_BASE_CPU + IRQ_XIRQ7_LEVEL)
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_CPU_IRQS_H */
diff --git a/include/asm-frv/hardirq.h b/include/asm-frv/hardirq.h
index 7581b5a7559a..fc47515822a2 100644
--- a/include/asm-frv/hardirq.h
+++ b/include/asm-frv/hardirq.h
@@ -26,5 +26,10 @@ typedef struct {
#error SMP not available on FR-V
#endif /* CONFIG_SMP */
+extern atomic_t irq_err_count;
+static inline void ack_bad_irq(int irq)
+{
+ atomic_inc(&irq_err_count);
+}
#endif
diff --git a/include/asm-frv/irq-routing.h b/include/asm-frv/irq-routing.h
deleted file mode 100644
index ac3ab900a1dc..000000000000
--- a/include/asm-frv/irq-routing.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* irq-routing.h: multiplexed IRQ routing
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ASM_IRQ_ROUTING_H
-#define _ASM_IRQ_ROUTING_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/spinlock.h>
-#include <asm/irq.h>
-
-struct irq_source;
-struct irq_level;
-
-/*
- * IRQ action distribution sets
- */
-struct irq_group {
- int first_irq; /* first IRQ distributed here */
- void (*control)(struct irq_group *group, int index, int on);
-
- struct irqaction *actions[NR_IRQ_ACTIONS_PER_GROUP]; /* IRQ action chains */
- struct irq_source *sources[NR_IRQ_ACTIONS_PER_GROUP]; /* IRQ sources */
- int disable_cnt[NR_IRQ_ACTIONS_PER_GROUP]; /* disable counts */
-};
-
-/*
- * IRQ source manager
- */
-struct irq_source {
- struct irq_source *next;
- struct irq_level *level;
- const char *muxname;
- volatile void __iomem *muxdata;
- unsigned long irqmask;
-
- void (*doirq)(struct irq_source *source);
-};
-
-/*
- * IRQ level management (per CPU IRQ priority / entry vector)
- */
-struct irq_level {
- int usage;
- int disable_count;
- unsigned long flags; /* current IRQF_DISABLED and IRQF_SHARED settings */
- spinlock_t lock;
- struct irq_source *sources;
-};
-
-extern struct irq_level frv_irq_levels[16];
-extern struct irq_group *irq_groups[NR_IRQ_GROUPS];
-
-extern void frv_irq_route(struct irq_source *source, int irqlevel);
-extern void frv_irq_route_external(struct irq_source *source, int irq);
-extern void frv_irq_set_group(struct irq_group *group);
-extern void distribute_irqs(struct irq_group *group, unsigned long irqmask);
-extern void route_cpu_irqs(void);
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* _ASM_IRQ_ROUTING_H */
diff --git a/include/asm-frv/irq.h b/include/asm-frv/irq.h
index 58b619215a50..8fefd6b827aa 100644
--- a/include/asm-frv/irq.h
+++ b/include/asm-frv/irq.h
@@ -1,6 +1,6 @@
/* irq.h: FRV IRQ definitions
*
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -12,32 +12,22 @@
#ifndef _ASM_IRQ_H_
#define _ASM_IRQ_H_
-
-/*
- * the system has an on-CPU PIC and another PIC on the FPGA and other PICs on other peripherals,
- * so we do some routing in irq-routing.[ch] to reduce the number of false-positives seen by
- * drivers
- */
-
/* this number is used when no interrupt has been assigned */
#define NO_IRQ (-1)
-#define NR_IRQ_LOG2_ACTIONS_PER_GROUP 5
-#define NR_IRQ_ACTIONS_PER_GROUP (1 << NR_IRQ_LOG2_ACTIONS_PER_GROUP)
-#define NR_IRQ_GROUPS 4
-#define NR_IRQS (NR_IRQ_ACTIONS_PER_GROUP * NR_IRQ_GROUPS)
+#define NR_IRQS 48
+#define IRQ_BASE_CPU (0 * 16)
+#define IRQ_BASE_FPGA (1 * 16)
+#define IRQ_BASE_MB93493 (2 * 16)
/* probe returns a 32-bit IRQ mask:-/ */
-#define MIN_PROBE_IRQ (NR_IRQS - 32)
+#define MIN_PROBE_IRQ (NR_IRQS - 32)
+#ifndef __ASSEMBLY__
static inline int irq_canonicalize(int irq)
{
return irq;
}
-
-extern void disable_irq_nosync(unsigned int irq);
-extern void disable_irq(unsigned int irq);
-extern void enable_irq(unsigned int irq);
-
+#endif
#endif /* _ASM_IRQ_H_ */
diff --git a/include/asm-frv/mb93091-fpga-irqs.h b/include/asm-frv/mb93091-fpga-irqs.h
index 341bfc52a0eb..19778c5ba9d6 100644
--- a/include/asm-frv/mb93091-fpga-irqs.h
+++ b/include/asm-frv/mb93091-fpga-irqs.h
@@ -12,11 +12,9 @@
#ifndef _ASM_MB93091_FPGA_IRQS_H
#define _ASM_MB93091_FPGA_IRQS_H
-#ifndef __ASSEMBLY__
-
-#include <asm/irq-routing.h>
+#include <asm/irq.h>
-#define IRQ_BASE_FPGA (NR_IRQ_ACTIONS_PER_GROUP * 1)
+#ifndef __ASSEMBLY__
/* IRQ IDs presented to drivers */
enum {
diff --git a/include/asm-frv/mb93093-fpga-irqs.h b/include/asm-frv/mb93093-fpga-irqs.h
index 1e0f11c2fcdb..590266b1a6d3 100644
--- a/include/asm-frv/mb93093-fpga-irqs.h
+++ b/include/asm-frv/mb93093-fpga-irqs.h
@@ -12,11 +12,9 @@
#ifndef _ASM_MB93093_FPGA_IRQS_H
#define _ASM_MB93093_FPGA_IRQS_H
-#ifndef __ASSEMBLY__
-
-#include <asm/irq-routing.h>
+#include <asm/irq.h>
-#define IRQ_BASE_FPGA (NR_IRQ_ACTIONS_PER_GROUP * 1)
+#ifndef __ASSEMBLY__
/* IRQ IDs presented to drivers */
enum {
diff --git a/include/asm-frv/mb93493-irqs.h b/include/asm-frv/mb93493-irqs.h
index 15096e731325..82c7aeddd333 100644
--- a/include/asm-frv/mb93493-irqs.h
+++ b/include/asm-frv/mb93493-irqs.h
@@ -12,11 +12,9 @@
#ifndef _ASM_MB93493_IRQS_H
#define _ASM_MB93493_IRQS_H
-#ifndef __ASSEMBLY__
-
-#include <asm/irq-routing.h>
+#include <asm/irq.h>
-#define IRQ_BASE_MB93493 (NR_IRQ_ACTIONS_PER_GROUP * 2)
+#ifndef __ASSEMBLY__
/* IRQ IDs presented to drivers */
enum {
diff --git a/include/asm-frv/mb93493-regs.h b/include/asm-frv/mb93493-regs.h
index c54aa9d14468..8a1f6aac8cf1 100644
--- a/include/asm-frv/mb93493-regs.h
+++ b/include/asm-frv/mb93493-regs.h
@@ -15,6 +15,7 @@
#include <asm/mb-regs.h>
#include <asm/mb93493-irqs.h>
+#define __addr_MB93493(X) ((volatile unsigned long *)(__region_CS3 + (X)))
#define __get_MB93493(X) ({ *(volatile unsigned long *)(__region_CS3 + (X)); })
#define __set_MB93493(X,V) \
@@ -26,6 +27,7 @@ do { \
#define __set_MB93493_STSR(X,V) __set_MB93493(0x3c0 + (X) * 4, (V))
#define MB93493_STSR_EN
+#define __addr_MB93493_IQSR(X) __addr_MB93493(0x3d0 + (X) * 4)
#define __get_MB93493_IQSR(X) __get_MB93493(0x3d0 + (X) * 4)
#define __set_MB93493_IQSR(X,V) __set_MB93493(0x3d0 + (X) * 4, (V))
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index 7af7485e889e..2fb3c6f05e03 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -217,7 +217,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
}
#define pgd_page(pgd) (pud_page((pud_t){ pgd }))
-#define pgd_page_kernel(pgd) (pud_page_kernel((pud_t){ pgd }))
+#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd }))
/*
* allocating and freeing a pud is trivial: the 1-entry pud is
@@ -246,7 +246,7 @@ static inline void pud_clear(pud_t *pud) { }
#define set_pud(pudptr, pudval) set_pmd((pmd_t *)(pudptr), (pmd_t) { pudval })
#define pud_page(pud) (pmd_page((pmd_t){ pud }))
-#define pud_page_kernel(pud) (pmd_page_kernel((pmd_t){ pud }))
+#define pud_page_vaddr(pud) (pmd_page_vaddr((pmd_t){ pud }))
/*
* (pmds are folded into pgds so this doesn't get actually called,
@@ -362,7 +362,7 @@ static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address)
#define pmd_bad(x) (pmd_val(x) & xAMPRx_SS)
#define pmd_clear(xp) do { __set_pmd(xp, 0); } while(0)
-#define pmd_page_kernel(pmd) \
+#define pmd_page_vaddr(pmd) \
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
#ifndef CONFIG_DISCONTIGMEM
@@ -458,7 +458,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define pte_index(address) \
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset_kernel(dir, address) \
- ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
+ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
#if defined(CONFIG_HIGHPTE)
#define pte_offset_map(dir, address) \
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
index 68c6fea994d9..7b88d3931e34 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -21,6 +21,10 @@
#define pud_present(pud) 1
#define pud_ERROR(pud) do { } while (0)
#define pud_clear(pud) pgd_clear(pud)
+#define pud_val(pud) pgd_val(pud)
+#define pud_populate(mm, pud, pmd) pgd_populate(mm, pud, pmd)
+#define pud_page(pud) pgd_page(pud)
+#define pud_page_vaddr(pud) pgd_page_vaddr(pud)
#undef pud_free_tlb
#define pud_free_tlb(tlb, x) do { } while (0)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index e160e04290fb..6d45ee5472af 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -14,7 +14,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
__attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
+#define per_cpu(var, cpu) (*({ \
+ extern int simple_indentifier_##var(void); \
+ RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
#define __get_cpu_var(var) per_cpu(var, smp_processor_id())
#define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id())
diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h
index c8d53ba20e19..29ff5d84d8c3 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -47,7 +47,7 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address)
#define __pmd(x) ((pmd_t) { __pud(x) } )
#define pud_page(pud) (pmd_page((pmd_t){ pud }))
-#define pud_page_kernel(pud) (pmd_page_kernel((pmd_t){ pud }))
+#define pud_page_vaddr(pud) (pmd_page_vaddr((pmd_t){ pud }))
/*
* allocating and freeing a pmd is trivial: the 1-entry pmd is
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index 82e29f0ce467..566464500558 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -44,7 +44,7 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
#define __pud(x) ((pud_t) { __pgd(x) } )
#define pgd_page(pgd) (pud_page((pud_t){ pgd }))
-#define pgd_page_kernel(pgd) (pud_page_kernel((pud_t){ pgd }))
+#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd }))
/*
* allocating and freeing a pud is trivial: the 1-entry pud is
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index c2059a3a0621..349260cd86ed 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1,6 +1,8 @@
#ifndef _ASM_GENERIC_PGTABLE_H
#define _ASM_GENERIC_PGTABLE_H
+#ifndef __ASSEMBLY__
+
#ifndef __HAVE_ARCH_PTEP_ESTABLISH
/*
* Establish a new mapping:
@@ -188,7 +190,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
})
#endif
-#ifndef __ASSEMBLY__
/*
* When walking page tables, we usually want to skip any p?d_none entries;
* and any p?d_bad entries - reporting the error before resetting to none.
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index db5a3732f106..253ae1328271 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -194,3 +194,6 @@
.stab.index 0 : { *(.stab.index) } \
.stab.indexstr 0 : { *(.stab.indexstr) } \
.comment 0 : { *(.comment) }
+
+#define NOTES \
+ .notes : { *(.note.*) } :note
diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild
index b75a348d0c1c..147e4ac1ebf0 100644
--- a/include/asm-i386/Kbuild
+++ b/include/asm-i386/Kbuild
@@ -3,6 +3,7 @@ include include/asm-generic/Kbuild.asm
header-y += boot.h
header-y += debugreg.h
header-y += ldt.h
+header-y += ptrace-abi.h
header-y += ucontext.h
unifdef-y += mtrr.h
diff --git a/include/asm-i386/dma-mapping.h b/include/asm-i386/dma-mapping.h
index 9cf20cacf76e..576ae01d71c8 100644
--- a/include/asm-i386/dma-mapping.h
+++ b/include/asm-i386/dma-mapping.h
@@ -21,8 +21,7 @@ static inline dma_addr_t
dma_map_single(struct device *dev, void *ptr, size_t size,
enum dma_data_direction direction)
{
- if (direction == DMA_NONE)
- BUG();
+ BUG_ON(direction == DMA_NONE);
WARN_ON(size == 0);
flush_write_buffers();
return virt_to_phys(ptr);
@@ -32,8 +31,7 @@ static inline void
dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
enum dma_data_direction direction)
{
- if (direction == DMA_NONE)
- BUG();
+ BUG_ON(direction == DMA_NONE);
}
static inline int
@@ -42,8 +40,7 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
{
int i;
- if (direction == DMA_NONE)
- BUG();
+ BUG_ON(direction == DMA_NONE);
WARN_ON(nents == 0 || sg[0].length == 0);
for (i = 0; i < nents; i++ ) {
diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h
index a48cc3f7ccc6..02428cb36621 100644
--- a/include/asm-i386/fixmap.h
+++ b/include/asm-i386/fixmap.h
@@ -19,7 +19,11 @@
* Leave one empty page between vmalloc'ed areas and
* the start of the fixmap.
*/
-#define __FIXADDR_TOP 0xfffff000
+#ifndef CONFIG_COMPAT_VDSO
+extern unsigned long __FIXADDR_TOP;
+#else
+#define __FIXADDR_TOP 0xfffff000
+#endif
#ifndef __ASSEMBLY__
#include <linux/kernel.h>
@@ -93,6 +97,7 @@ enum fixed_addresses {
extern void __set_fixmap (enum fixed_addresses idx,
unsigned long phys, pgprot_t flags);
+extern void reserve_top_address(unsigned long reserve);
#define set_fixmap(idx, phys) \
__set_fixmap(idx, phys, PAGE_KERNEL)
diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h
index 22cb07cc8f32..61b073322006 100644
--- a/include/asm-i386/mmzone.h
+++ b/include/asm-i386/mmzone.h
@@ -38,10 +38,16 @@ static inline void get_memcfg_numa(void)
}
extern int early_pfn_to_nid(unsigned long pfn);
+extern void numa_kva_reserve(void);
#else /* !CONFIG_NUMA */
+
#define get_memcfg_numa get_memcfg_numa_flat
#define get_zholes_size(n) (0)
+
+static inline void numa_kva_reserve(void)
+{
+}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DISCONTIGMEM
diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h
index 2756d4b04c27..201c86a6711e 100644
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -21,8 +21,9 @@
#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0))
-#define pte_same(a, b) ((a).pte_low == (b).pte_low)
+
#define pte_page(x) pfn_to_page(pte_pfn(x))
#define pte_none(x) (!(x).pte_low)
#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h
index dccb1b3337ad..0d899173232e 100644
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -77,7 +77,7 @@ static inline void pud_clear (pud_t * pud) { }
#define pud_page(pud) \
((struct page *) __va(pud_val(pud) & PAGE_MASK))
-#define pud_page_kernel(pud) \
+#define pud_page_vaddr(pud) \
((unsigned long) __va(pud_val(pud) & PAGE_MASK))
@@ -105,6 +105,7 @@ static inline void pmd_clear(pmd_t *pmd)
*(tmp + 1) = 0;
}
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
pte_t res;
@@ -117,6 +118,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return res;
}
+#define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t a, pte_t b)
{
return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 09697fec3d2b..0dc051a8078b 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -246,6 +246,23 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p
# include <asm/pgtable-2level.h>
#endif
+/*
+ * We only update the dirty/accessed state if we set
+ * the dirty bit by hand in the kernel, since the hardware
+ * will do the accessed bit for us, and we don't want to
+ * race with other CPU's that might be updating the dirty
+ * bit at the same time.
+ */
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
+do { \
+ if (dirty) { \
+ (ptep)->pte_low = (entry).pte_low; \
+ flush_tlb_page(vma, address); \
+ } \
+} while (0)
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
if (!pte_dirty(*ptep))
@@ -253,6 +270,7 @@ static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned
return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
}
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
if (!pte_young(*ptep))
@@ -260,6 +278,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned
return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
}
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
{
pte_t pte;
@@ -272,6 +291,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
return pte;
}
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
@@ -364,11 +384,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define pte_index(address) \
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset_kernel(dir, address) \
- ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
+ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-#define pmd_page_kernel(pmd) \
+#define pmd_page_vaddr(pmd) \
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
/*
@@ -411,23 +431,8 @@ extern void noexec_setup(const char *str);
/*
* The i386 doesn't have any external MMU info: the kernel page
* tables contain all the necessary information.
- *
- * Also, we only update the dirty/accessed state if we set
- * the dirty bit by hand in the kernel, since the hardware
- * will do the accessed bit for us, and we don't want to
- * race with other CPU's that might be updating the dirty
- * bit at the same time.
*/
#define update_mmu_cache(vma,address,pte) do { } while (0)
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
- do { \
- if (__dirty) { \
- (__ptep)->pte_low = (__entry).pte_low; \
- flush_tlb_page(__vma, __address); \
- } \
- } while (0)
-
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_FLATMEM
@@ -441,12 +446,6 @@ extern void noexec_setup(const char *str);
#define GET_IOSPACE(pfn) 0
#define GET_PFN(pfn) (pfn)
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTE_SAME
#include <asm-generic/pgtable.h>
#endif /* _I386_PGTABLE_H */
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index b32346d62e10..2277127696d2 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -143,6 +143,18 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {}
#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ __asm__("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
/*
* Generic CPUID function
* clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
@@ -150,24 +162,18 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {}
*/
static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
{
- __asm__("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (op), "c"(0));
+ *eax = op;
+ *ecx = 0;
+ __cpuid(eax, ebx, ecx, edx);
}
/* Some CPUID calls want 'count' to be placed in ecx */
static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
- int *edx)
+ int *edx)
{
- __asm__("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (op), "c" (count));
+ *eax = op;
+ *ecx = count;
+ __cpuid(eax, ebx, ecx, edx);
}
/*
@@ -175,42 +181,30 @@ static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
*/
static inline unsigned int cpuid_eax(unsigned int op)
{
- unsigned int eax;
+ unsigned int eax, ebx, ecx, edx;
- __asm__("cpuid"
- : "=a" (eax)
- : "0" (op)
- : "bx", "cx", "dx");
+ cpuid(op, &eax, &ebx, &ecx, &edx);
return eax;
}
static inline unsigned int cpuid_ebx(unsigned int op)
{
- unsigned int eax, ebx;
+ unsigned int eax, ebx, ecx, edx;
- __asm__("cpuid"
- : "=a" (eax), "=b" (ebx)
- : "0" (op)
- : "cx", "dx" );
+ cpuid(op, &eax, &ebx, &ecx, &edx);
return ebx;
}
static inline unsigned int cpuid_ecx(unsigned int op)
{
- unsigned int eax, ecx;
+ unsigned int eax, ebx, ecx, edx;
- __asm__("cpuid"
- : "=a" (eax), "=c" (ecx)
- : "0" (op)
- : "bx", "dx" );
+ cpuid(op, &eax, &ebx, &ecx, &edx);
return ecx;
}
static inline unsigned int cpuid_edx(unsigned int op)
{
- unsigned int eax, edx;
+ unsigned int eax, ebx, ecx, edx;
- __asm__("cpuid"
- : "=a" (eax), "=d" (edx)
- : "0" (op)
- : "bx", "cx");
+ cpuid(op, &eax, &ebx, &ecx, &edx);
return edx;
}
diff --git a/include/asm-i386/ptrace-abi.h b/include/asm-i386/ptrace-abi.h
new file mode 100644
index 000000000000..a44901817a26
--- /dev/null
+++ b/include/asm-i386/ptrace-abi.h
@@ -0,0 +1,39 @@
+#ifndef I386_PTRACE_ABI_H
+#define I386_PTRACE_ABI_H
+
+#define EBX 0
+#define ECX 1
+#define EDX 2
+#define ESI 3
+#define EDI 4
+#define EBP 5
+#define EAX 6
+#define DS 7
+#define ES 8
+#define FS 9
+#define GS 10
+#define ORIG_EAX 11
+#define EIP 12
+#define CS 13
+#define EFL 14
+#define UESP 15
+#define SS 16
+#define FRAME_SIZE 17
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS 12
+#define PTRACE_SETREGS 13
+#define PTRACE_GETFPREGS 14
+#define PTRACE_SETFPREGS 15
+#define PTRACE_GETFPXREGS 18
+#define PTRACE_SETFPXREGS 19
+
+#define PTRACE_OLDSETOPTIONS 21
+
+#define PTRACE_GET_THREAD_AREA 25
+#define PTRACE_SET_THREAD_AREA 26
+
+#define PTRACE_SYSEMU 31
+#define PTRACE_SYSEMU_SINGLESTEP 32
+
+#endif
diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h
index f324c53b6f9a..1910880fcd40 100644
--- a/include/asm-i386/ptrace.h
+++ b/include/asm-i386/ptrace.h
@@ -1,24 +1,7 @@
#ifndef _I386_PTRACE_H
#define _I386_PTRACE_H
-#define EBX 0
-#define ECX 1
-#define EDX 2
-#define ESI 3
-#define EDI 4
-#define EBP 5
-#define EAX 6
-#define DS 7
-#define ES 8
-#define FS 9
-#define GS 10
-#define ORIG_EAX 11
-#define EIP 12
-#define CS 13
-#define EFL 14
-#define UESP 15
-#define SS 16
-#define FRAME_SIZE 17
+#include <asm/ptrace-abi.h>
/* this struct defines the way the registers are stored on the
stack during a system call. */
@@ -41,22 +24,6 @@ struct pt_regs {
int xss;
};
-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
-#define PTRACE_GETREGS 12
-#define PTRACE_SETREGS 13
-#define PTRACE_GETFPREGS 14
-#define PTRACE_SETFPREGS 15
-#define PTRACE_GETFPXREGS 18
-#define PTRACE_SETFPXREGS 19
-
-#define PTRACE_OLDSETOPTIONS 21
-
-#define PTRACE_GET_THREAD_AREA 25
-#define PTRACE_SET_THREAD_AREA 26
-
-#define PTRACE_SYSEMU 31
-#define PTRACE_SYSEMU_SINGLESTEP 32
-
#ifdef __KERNEL__
#include <asm/vm86.h>
diff --git a/include/asm-i386/sync_bitops.h b/include/asm-i386/sync_bitops.h
new file mode 100644
index 000000000000..c94d51c993ee
--- /dev/null
+++ b/include/asm-i386/sync_bitops.h
@@ -0,0 +1,156 @@
+#ifndef _I386_SYNC_BITOPS_H
+#define _I386_SYNC_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+#define ADDR (*(volatile long *) addr)
+
+/**
+ * sync_set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered. See __set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note: there are no guarantees that this function will not be reordered
+ * on non x86 architectures, so if you are writting portable code,
+ * make sure not to rely on its reordering guarantees.
+ *
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void sync_set_bit(int nr, volatile unsigned long * addr)
+{
+ __asm__ __volatile__("lock; btsl %1,%0"
+ :"+m" (ADDR)
+ :"Ir" (nr)
+ : "memory");
+}
+
+/**
+ * sync_clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * sync_clear_bit() is atomic and may not be reordered. However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static inline void sync_clear_bit(int nr, volatile unsigned long * addr)
+{
+ __asm__ __volatile__("lock; btrl %1,%0"
+ :"+m" (ADDR)
+ :"Ir" (nr)
+ : "memory");
+}
+
+/**
+ * sync_change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered. It may be
+ * reordered on other architectures than x86.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void sync_change_bit(int nr, volatile unsigned long * addr)
+{
+ __asm__ __volatile__("lock; btcl %1,%0"
+ :"+m" (ADDR)
+ :"Ir" (nr)
+ : "memory");
+}
+
+/**
+ * sync_test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It may be reordered on other architectures than x86.
+ * It also implies a memory barrier.
+ */
+static inline int sync_test_and_set_bit(int nr, volatile unsigned long * addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__("lock; btsl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"+m" (ADDR)
+ :"Ir" (nr) : "memory");
+ return oldbit;
+}
+
+/**
+ * sync_test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It can be reorderdered on other architectures other than x86.
+ * It also implies a memory barrier.
+ */
+static inline int sync_test_and_clear_bit(int nr, volatile unsigned long * addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__("lock; btrl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"+m" (ADDR)
+ :"Ir" (nr) : "memory");
+ return oldbit;
+}
+
+/**
+ * sync_test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int sync_test_and_change_bit(int nr, volatile unsigned long* addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__("lock; btcl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"+m" (ADDR)
+ :"Ir" (nr) : "memory");
+ return oldbit;
+}
+
+static __always_inline int sync_const_test_bit(int nr, const volatile unsigned long *addr)
+{
+ return ((1UL << (nr & 31)) &
+ (((const volatile unsigned int *)addr)[nr >> 5])) != 0;
+}
+
+static inline int sync_var_test_bit(int nr, const volatile unsigned long * addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit)
+ :"m" (ADDR),"Ir" (nr));
+ return oldbit;
+}
+
+#define sync_test_bit(nr,addr) \
+ (__builtin_constant_p(nr) ? \
+ sync_constant_test_bit((nr),(addr)) : \
+ sync_var_test_bit((nr),(addr)))
+
+#undef ADDR
+
+#endif /* _I386_SYNC_BITOPS_H */
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index 098bcee94e38..a6dabbcd6e6a 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -267,6 +267,9 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
#define cmpxchg(ptr,o,n)\
((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
(unsigned long)(n),sizeof(*(ptr))))
+#define sync_cmpxchg(ptr,o,n)\
+ ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
+ (unsigned long)(n),sizeof(*(ptr))))
#endif
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
@@ -296,6 +299,39 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
return old;
}
+/*
+ * Always use locked operations when touching memory shared with a
+ * hypervisor, since the system may be SMP even if the guest kernel
+ * isn't.
+ */
+static inline unsigned long __sync_cmpxchg(volatile void *ptr,
+ unsigned long old,
+ unsigned long new, int size)
+{
+ unsigned long prev;
+ switch (size) {
+ case 1:
+ __asm__ __volatile__("lock; cmpxchgb %b1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+ __asm__ __volatile__("lock; cmpxchgw %w1,%2"
+ : "=a"(prev)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+ __asm__ __volatile__("lock; cmpxchgl %1,%2"
+ : "=a"(prev)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ }
+ return old;
+}
+
#ifndef CONFIG_X86_CMPXCHG
/*
* Building a kernel capable running on 80386. It may be necessary to
diff --git a/include/asm-ia64/numa.h b/include/asm-ia64/numa.h
index e5a8260593a5..e0a1d173e42d 100644
--- a/include/asm-ia64/numa.h
+++ b/include/asm-ia64/numa.h
@@ -64,6 +64,10 @@ extern int paddr_to_nid(unsigned long paddr);
#define local_nodeid (cpu_to_node_map[smp_processor_id()])
+extern void map_cpu_to_node(int cpu, int nid);
+extern void unmap_cpu_from_node(int cpu, int nid);
+
+
#else /* !CONFIG_NUMA */
#define paddr_to_nid(addr) 0
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index 228981cadf8f..553182747722 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -275,21 +275,23 @@ ia64_phys_addr_valid (unsigned long addr)
#define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd)))
#define pmd_present(pmd) (pmd_val(pmd) != 0UL)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK))
+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK))
#define pmd_page(pmd) virt_to_page((pmd_val(pmd) + PAGE_OFFSET))
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud)))
#define pud_present(pud) (pud_val(pud) != 0UL)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
-#define pud_page(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK))
+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK))
+#define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET))
#ifdef CONFIG_PGTABLE_4
#define pgd_none(pgd) (!pgd_val(pgd))
#define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd)))
#define pgd_present(pgd) (pgd_val(pgd) != 0UL)
#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL)
-#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & _PFN_MASK))
+#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & _PFN_MASK))
+#define pgd_page(pgd) virt_to_page((pgd_val(pgd) + PAGE_OFFSET))
#endif
/*
@@ -360,19 +362,19 @@ pgd_offset (struct mm_struct *mm, unsigned long address)
#ifdef CONFIG_PGTABLE_4
/* Find an entry in the second-level page table.. */
#define pud_offset(dir,addr) \
- ((pud_t *) pgd_page(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
+ ((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
#endif
/* Find an entry in the third-level page table.. */
#define pmd_offset(dir,addr) \
- ((pmd_t *) pud_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+ ((pmd_t *) pud_page_vaddr(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
/*
* Find an entry in the third-level page table. This looks more complicated than it
* should be because some platforms place page tables in high memory.
*/
#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset_kernel(dir,addr) ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr))
+#define pte_offset_kernel(dir,addr) ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
#define pte_offset_map(dir,addr) pte_offset_kernel(dir, addr)
#define pte_offset_map_nested(dir,addr) pte_offset_map(dir, addr)
#define pte_unmap(pte) do { } while (0)
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index 719ff309ce09..74bde1c2bb1a 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -122,8 +122,6 @@ extern void __init smp_build_cpu_map(void);
extern void __init init_smp_config (void);
extern void smp_do_timer (struct pt_regs *regs);
-extern int smp_call_function_single (int cpuid, void (*func) (void *info), void *info,
- int retry, int wait);
extern void smp_send_reschedule (int cpu);
extern void lock_ipi_calllock(void);
extern void unlock_ipi_calllock(void);
diff --git a/include/asm-m32r/pgtable-2level.h b/include/asm-m32r/pgtable-2level.h
index be0f167e344a..6a674e3d37a2 100644
--- a/include/asm-m32r/pgtable-2level.h
+++ b/include/asm-m32r/pgtable-2level.h
@@ -52,9 +52,13 @@ static inline int pgd_present(pgd_t pgd) { return 1; }
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
#define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval)
-#define pgd_page(pgd) \
+#define pgd_page_vaddr(pgd) \
((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
+#ifndef CONFIG_DISCONTIGMEM
+#define pgd_page(pgd) (mem_map + ((pgd_val(pgd) >> PAGE_SHIFT) - PFN_BASE))
+#endif /* !CONFIG_DISCONTIGMEM */
+
static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address)
{
return (pmd_t *) dir;
diff --git a/include/asm-m32r/pgtable.h b/include/asm-m32r/pgtable.h
index 1983b7f4527a..1c15ba7ce319 100644
--- a/include/asm-m32r/pgtable.h
+++ b/include/asm-m32r/pgtable.h
@@ -336,7 +336,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
pmd_val(*pmdp) = (((unsigned long) ptep) & PAGE_MASK);
}
-#define pmd_page_kernel(pmd) \
+#define pmd_page_vaddr(pmd) \
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
#ifndef CONFIG_DISCONTIGMEM
@@ -358,7 +358,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
#define pte_index(address) \
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset_kernel(dir, address) \
- ((pte_t *)pmd_page_kernel(*(dir)) + pte_index(address))
+ ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index(address))
#define pte_offset_map(dir, address) \
((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
diff --git a/include/asm-m68k/motorola_pgtable.h b/include/asm-m68k/motorola_pgtable.h
index 1ccc7338a54b..61e4406ed96a 100644
--- a/include/asm-m68k/motorola_pgtable.h
+++ b/include/asm-m68k/motorola_pgtable.h
@@ -150,6 +150,7 @@ static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
#define pgd_bad(pgd) ((pgd_val(pgd) & _DESCTYPE_MASK) != _PAGE_TABLE)
#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_TABLE)
#define pgd_clear(pgdp) ({ pgd_val(*pgdp) = 0; })
+#define pgd_page(pgd) (mem_map + ((unsigned long)(__va(pgd_val(pgd)) - PAGE_OFFSET) >> PAGE_SHIFT))
#define pte_ERROR(e) \
printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
diff --git a/include/asm-mips/mach-au1x00/au1xxx_dbdma.h b/include/asm-mips/mach-au1x00/au1xxx_dbdma.h
index d5b38a247e5a..eeb0c3115b6a 100644
--- a/include/asm-mips/mach-au1x00/au1xxx_dbdma.h
+++ b/include/asm-mips/mach-au1x00/au1xxx_dbdma.h
@@ -316,7 +316,7 @@ typedef struct dbdma_chan_config {
au1x_ddma_desc_t *chan_desc_base;
au1x_ddma_desc_t *get_ptr, *put_ptr, *cur_ptr;
void *chan_callparam;
- void (*chan_callback)(int, void *, struct pt_regs *);
+ void (*chan_callback)(int, void *);
} chan_tab_t;
#define DEV_FLAGS_INUSE (1 << 0)
@@ -334,8 +334,8 @@ typedef struct dbdma_chan_config {
* meaningful name. The 'callback' is called during dma completion
* interrupt.
*/
-u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
- void (*callback)(int, void *, struct pt_regs *), void *callparam);
+extern u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
+ void (*callback)(int, void *), void *callparam);
#define DBDMA_MEM_CHAN DSCR_CMD0_ALWAYS
diff --git a/include/asm-mips/pgtable-32.h b/include/asm-mips/pgtable-32.h
index 4b26d8528133..d20f2e9b28be 100644
--- a/include/asm-mips/pgtable-32.h
+++ b/include/asm-mips/pgtable-32.h
@@ -156,9 +156,9 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
#define __pte_offset(address) \
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset(dir, address) \
- ((pte_t *) (pmd_page_kernel(*dir)) + __pte_offset(address))
+ ((pte_t *) (pmd_page_vaddr(*dir)) + __pte_offset(address))
#define pte_offset_kernel(dir, address) \
- ((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address))
+ ((pte_t *) pmd_page_vaddr(*(dir)) + __pte_offset(address))
#define pte_offset_map(dir, address) \
((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
diff --git a/include/asm-mips/pgtable-64.h b/include/asm-mips/pgtable-64.h
index e3db93212eab..c59a1e21f5b0 100644
--- a/include/asm-mips/pgtable-64.h
+++ b/include/asm-mips/pgtable-64.h
@@ -178,24 +178,26 @@ static inline void pud_clear(pud_t *pudp)
/* to find an entry in a page-table-directory */
#define pgd_offset(mm,addr) ((mm)->pgd + pgd_index(addr))
-static inline unsigned long pud_page(pud_t pud)
+static inline unsigned long pud_page_vaddr(pud_t pud)
{
return pud_val(pud);
}
+#define pud_phys(pud) (pud_val(pud) - PAGE_OFFSET)
+#define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT))
/* Find an entry in the second-level page table.. */
static inline pmd_t *pmd_offset(pud_t * pud, unsigned long address)
{
- return (pmd_t *) pud_page(*pud) + pmd_index(address);
+ return (pmd_t *) pud_page_vaddr(*pud) + pmd_index(address);
}
/* Find an entry in the third-level page table.. */
#define __pte_offset(address) \
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset(dir, address) \
- ((pte_t *) (pmd_page_kernel(*dir)) + __pte_offset(address))
+ ((pte_t *) (pmd_page_vaddr(*dir)) + __pte_offset(address))
#define pte_offset_kernel(dir, address) \
- ((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address))
+ ((pte_t *) pmd_page_vaddr(*(dir)) + __pte_offset(address))
#define pte_offset_map(dir, address) \
((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
#define pte_offset_map_nested(dir, address) \
diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h
index a36ca1be17f2..1ca4d1e185c7 100644
--- a/include/asm-mips/pgtable.h
+++ b/include/asm-mips/pgtable.h
@@ -87,7 +87,7 @@ extern void paging_init(void);
*/
#define pmd_phys(pmd) (pmd_val(pmd) - PAGE_OFFSET)
#define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT))
-#define pmd_page_kernel(pmd) pmd_val(pmd)
+#define pmd_page_vaddr(pmd) pmd_val(pmd)
#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1)
diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h
index 5066c54dae0a..c0b61e0d1497 100644
--- a/include/asm-parisc/pgtable.h
+++ b/include/asm-parisc/pgtable.h
@@ -303,7 +303,8 @@ static inline void pmd_clear(pmd_t *pmd) {
#if PT_NLEVELS == 3
-#define pgd_page(pgd) ((unsigned long) __va(pgd_address(pgd)))
+#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd)))
+#define pgd_page(pgd) virt_to_page((void *)pgd_page_vaddr(pgd))
/* For 64 bit we have three level tables */
@@ -382,7 +383,7 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_address(pmd)))
+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_address(pmd)))
#define __pmd_page(pmd) ((unsigned long) __va(pmd_address(pmd)))
#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
@@ -400,7 +401,7 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#if PT_NLEVELS == 3
#define pmd_offset(dir,address) \
-((pmd_t *) pgd_page(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1)))
+((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1)))
#else
#define pmd_offset(dir,addr) ((pmd_t *) dir)
#endif
@@ -408,7 +409,7 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
/* Find an entry in the third-level page table.. */
#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
#define pte_offset_kernel(pmd, address) \
- ((pte_t *) pmd_page_kernel(*(pmd)) + pte_index(address))
+ ((pte_t *) pmd_page_vaddr(*(pmd)) + pte_index(address))
#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
#define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address)
#define pte_unmap(pte) do { } while (0)
diff --git a/include/asm-powerpc/pgtable-4k.h b/include/asm-powerpc/pgtable-4k.h
index e7036155672e..345d9b07b3e2 100644
--- a/include/asm-powerpc/pgtable-4k.h
+++ b/include/asm-powerpc/pgtable-4k.h
@@ -88,10 +88,11 @@
#define pgd_bad(pgd) (pgd_val(pgd) == 0)
#define pgd_present(pgd) (pgd_val(pgd) != 0)
#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
-#define pgd_page(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS)
+#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS)
+#define pgd_page(pgd) virt_to_page(pgd_page_vaddr(pgd))
#define pud_offset(pgdp, addr) \
- (((pud_t *) pgd_page(*(pgdp))) + \
+ (((pud_t *) pgd_page_vaddr(*(pgdp))) + \
(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
#define pud_ERROR(e) \
diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h
index 8dbf5ad8150f..10f52743f4ff 100644
--- a/include/asm-powerpc/pgtable.h
+++ b/include/asm-powerpc/pgtable.h
@@ -196,8 +196,8 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
|| (pmd_val(pmd) & PMD_BAD_BITS))
#define pmd_present(pmd) (pmd_val(pmd) != 0)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
-#define pmd_page_kernel(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
-#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd))
+#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
+#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd))
#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval))
#define pud_none(pud) (!pud_val(pud))
@@ -205,7 +205,8 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
|| (pud_val(pud) & PUD_BAD_BITS))
#define pud_present(pud) (pud_val(pud) != 0)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0)
-#define pud_page(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
+#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
+#define pud_page(pud) virt_to_page(pud_page_vaddr(pud))
#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
@@ -219,10 +220,10 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
#define pmd_offset(pudp,addr) \
- (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+ (((pmd_t *) pud_page_vaddr(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
#define pte_offset_kernel(dir,addr) \
- (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
+ (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h
index 51fa7c662917..b1fdbf40dba2 100644
--- a/include/asm-ppc/pgtable.h
+++ b/include/asm-ppc/pgtable.h
@@ -526,7 +526,7 @@ static inline int pgd_bad(pgd_t pgd) { return 0; }
static inline int pgd_present(pgd_t pgd) { return 1; }
#define pgd_clear(xp) do { } while (0)
-#define pgd_page(pgd) \
+#define pgd_page_vaddr(pgd) \
((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
/*
@@ -720,12 +720,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
* of the pte page. -- paulus
*/
#ifndef CONFIG_BOOKE
-#define pmd_page_kernel(pmd) \
+#define pmd_page_vaddr(pmd) \
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
#define pmd_page(pmd) \
(mem_map + (pmd_val(pmd) >> PAGE_SHIFT))
#else
-#define pmd_page_kernel(pmd) \
+#define pmd_page_vaddr(pmd) \
((unsigned long) (pmd_val(pmd) & PAGE_MASK))
#define pmd_page(pmd) \
(mem_map + (__pa(pmd_val(pmd)) >> PAGE_SHIFT))
@@ -748,7 +748,7 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
#define pte_index(address) \
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset_kernel(dir, addr) \
- ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr))
+ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
#define pte_offset_map(dir, addr) \
((pte_t *) kmap_atomic(pmd_page(*(dir)), KM_PTE0) + pte_index(addr))
#define pte_offset_map_nested(dir, addr) \
diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h
index 28b3517e787c..495ad99c7635 100644
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -15,18 +15,20 @@
*/
#if defined(__s390x__) && defined(MODULE)
-#define __reloc_hide(var,offset) \
- (*({ unsigned long *__ptr; \
- asm ( "larl %0,per_cpu__"#var"@GOTENT" \
- : "=a" (__ptr) : "X" (per_cpu__##var) ); \
- (typeof(&per_cpu__##var))((*__ptr) + (offset)); }))
+#define __reloc_hide(var,offset) (*({ \
+ extern int simple_indentifier_##var(void); \
+ unsigned long *__ptr; \
+ asm ( "larl %0,per_cpu__"#var"@GOTENT" \
+ : "=a" (__ptr) : "X" (per_cpu__##var) ); \
+ (typeof(&per_cpu__##var))((*__ptr) + (offset)); }))
#else
-#define __reloc_hide(var, offset) \
- (*({ unsigned long __ptr; \
- asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \
- (typeof(&per_cpu__##var)) (__ptr + (offset)); }))
+#define __reloc_hide(var, offset) (*({ \
+ extern int simple_indentifier_##var(void); \
+ unsigned long __ptr; \
+ asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \
+ (typeof(&per_cpu__##var)) (__ptr + (offset)); }))
#endif
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 1a07028d575e..e965309fedac 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -664,11 +664,13 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
#define pte_page(x) pfn_to_page(pte_pfn(x))
-#define pmd_page_kernel(pmd) (pmd_val(pmd) & PAGE_MASK)
+#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK)
#define pmd_page(pmd) (mem_map+(pmd_val(pmd) >> PAGE_SHIFT))
-#define pgd_page_kernel(pgd) (pgd_val(pgd) & PAGE_MASK)
+#define pgd_page_vaddr(pgd) (pgd_val(pgd) & PAGE_MASK)
+
+#define pgd_page(pgd) (mem_map+(pgd_val(pgd) >> PAGE_SHIFT))
/* to find an entry in a page-table-directory */
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
@@ -690,14 +692,14 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
/* Find an entry in the second-level page table.. */
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
#define pmd_offset(dir,addr) \
- ((pmd_t *) pgd_page_kernel(*(dir)) + pmd_index(addr))
+ ((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(addr))
#endif /* __s390x__ */
/* Find an entry in the third-level page table.. */
#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
#define pte_offset_kernel(pmd, address) \
- ((pte_t *) pmd_page_kernel(*(pmd)) + pte_index(address))
+ ((pte_t *) pmd_page_vaddr(*(pmd)) + pte_index(address))
#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
#define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address)
#define pte_unmap(pte) do { } while (0)
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index a3a4e5fd30d7..578c2209fa76 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -337,6 +337,8 @@ struct notifier_block;
int register_idle_notifier(struct notifier_block *nb);
int unregister_idle_notifier(struct notifier_block *nb);
+#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
+
#endif
/*
diff --git a/include/asm-sh/pgtable-2level.h b/include/asm-sh/pgtable-2level.h
index b0528aa3cb1f..b525db6f61c6 100644
--- a/include/asm-sh/pgtable-2level.h
+++ b/include/asm-sh/pgtable-2level.h
@@ -50,9 +50,12 @@ static inline void pgd_clear (pgd_t * pgdp) { }
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
#define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval)
-#define pgd_page(pgd) \
+#define pgd_page_vaddr(pgd) \
((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
+#define pgd_page(pgd) \
+ (phys_to_page(pgd_val(pgd)))
+
static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
{
return (pmd_t *) dir;
diff --git a/include/asm-sh/pgtable.h b/include/asm-sh/pgtable.h
index dcd23a03683d..40d41a78041e 100644
--- a/include/asm-sh/pgtable.h
+++ b/include/asm-sh/pgtable.h
@@ -225,7 +225,7 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot)
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{ set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot))); return pte; }
-#define pmd_page_kernel(pmd) \
+#define pmd_page_vaddr(pmd) \
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
#define pmd_page(pmd) \
@@ -242,7 +242,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define pte_index(address) \
((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset_kernel(dir, address) \
- ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
+ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
#define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address)
#define pte_unmap(pte) do { } while (0)
diff --git a/include/asm-sh64/pgtable.h b/include/asm-sh64/pgtable.h
index 54c7821893f5..6b97c4cb1d64 100644
--- a/include/asm-sh64/pgtable.h
+++ b/include/asm-sh64/pgtable.h
@@ -190,7 +190,9 @@ static inline int pgd_bad(pgd_t pgd) { return 0; }
#endif
-#define pgd_page(pgd_entry) ((unsigned long) (pgd_val(pgd_entry) & PAGE_MASK))
+#define pgd_page_vaddr(pgd_entry) ((unsigned long) (pgd_val(pgd_entry) & PAGE_MASK))
+#define pgd_page(pgd) (virt_to_page(pgd_val(pgd)))
+
/*
* PMD defines. Middle level.
@@ -219,7 +221,7 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
#define pmd_none(pmd_entry) (pmd_val((pmd_entry)) == _PMD_EMPTY)
#define pmd_bad(pmd_entry) ((pmd_val(pmd_entry) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
-#define pmd_page_kernel(pmd_entry) \
+#define pmd_page_vaddr(pmd_entry) \
((unsigned long) __va(pmd_val(pmd_entry) & PAGE_MASK))
#define pmd_page(pmd) \
diff --git a/include/asm-sparc/pgtable.h b/include/asm-sparc/pgtable.h
index 226c6475c9a2..4f0a5ba0d6a0 100644
--- a/include/asm-sparc/pgtable.h
+++ b/include/asm-sparc/pgtable.h
@@ -143,10 +143,10 @@ extern unsigned long empty_zero_page;
/*
*/
BTFIXUPDEF_CALL_CONST(struct page *, pmd_page, pmd_t)
-BTFIXUPDEF_CALL_CONST(unsigned long, pgd_page, pgd_t)
+BTFIXUPDEF_CALL_CONST(unsigned long, pgd_page_vaddr, pgd_t)
#define pmd_page(pmd) BTFIXUP_CALL(pmd_page)(pmd)
-#define pgd_page(pgd) BTFIXUP_CALL(pgd_page)(pgd)
+#define pgd_page_vaddr(pgd) BTFIXUP_CALL(pgd_page_vaddr)(pgd)
BTFIXUPDEF_SETHI(none_mask)
BTFIXUPDEF_CALL_CONST(int, pte_present, pte_t)
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index ebfe395cfb87..b12be7a869f6 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -630,8 +630,9 @@ static inline unsigned long pte_present(pte_t pte)
#define __pmd_page(pmd) \
((unsigned long) __va((((unsigned long)pmd_val(pmd))<<11UL)))
#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
-#define pud_page(pud) \
+#define pud_page_vaddr(pud) \
((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL)))
+#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (0)
#define pmd_present(pmd) (pmd_val(pmd) != 0U)
@@ -653,7 +654,7 @@ static inline unsigned long pte_present(pte_t pte)
/* Find an entry in the second-level page table.. */
#define pmd_offset(pudp, address) \
- ((pmd_t *) pud_page(*(pudp)) + \
+ ((pmd_t *) pud_page_vaddr(*(pudp)) + \
(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)))
/* Find an entry in the third-level page table.. */
diff --git a/include/asm-um/pgtable-2level.h b/include/asm-um/pgtable-2level.h
index ffe017f6b64b..6050e0eb257e 100644
--- a/include/asm-um/pgtable-2level.h
+++ b/include/asm-um/pgtable-2level.h
@@ -41,7 +41,7 @@ static inline void pgd_mkuptodate(pgd_t pgd) { }
#define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot))
#define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot))
-#define pmd_page_kernel(pmd) \
+#define pmd_page_vaddr(pmd) \
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
/*
diff --git a/include/asm-um/pgtable-3level.h b/include/asm-um/pgtable-3level.h
index 786c25727289..ca0c2a92a112 100644
--- a/include/asm-um/pgtable-3level.h
+++ b/include/asm-um/pgtable-3level.h
@@ -74,11 +74,12 @@ extern inline void pud_clear (pud_t *pud)
set_pud(pud, __pud(0));
}
-#define pud_page(pud) \
+#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK)
+#define pud_page_vaddr(pud) \
((struct page *) __va(pud_val(pud) & PAGE_MASK))
/* Find an entry in the second-level page table.. */
-#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
+#define pmd_offset(pud, address) ((pmd_t *) pud_page_vaddr(*(pud)) + \
pmd_index(address))
static inline unsigned long pte_pfn(pte_t pte)
diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h
index ac64eb955868..4862daf8b906 100644
--- a/include/asm-um/pgtable.h
+++ b/include/asm-um/pgtable.h
@@ -349,7 +349,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
return pte;
}
-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
/*
* the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
@@ -389,7 +389,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
*/
#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset_kernel(dir, address) \
- ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
+ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
#define pte_offset_map(dir, address) \
((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
diff --git a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h
index 824c28896382..afa4fe1ca9f1 100644
--- a/include/asm-um/processor-generic.h
+++ b/include/asm-um/processor-generic.h
@@ -138,9 +138,7 @@ extern struct cpuinfo_um cpu_data[];
#ifdef CONFIG_MODE_SKAS
#define KSTK_REG(tsk, reg) \
- ({ union uml_pt_regs regs; \
- get_thread_regs(&regs, tsk->thread.mode.skas.switch_buf); \
- UPT_REG(&regs, reg); })
+ get_thread_reg(reg, tsk->thread.mode.skas.switch_buf)
#else
#define KSTK_REG(tsk, reg) (0xbadbabe)
#endif
diff --git a/include/asm-um/ptrace-generic.h b/include/asm-um/ptrace-generic.h
index a36f5371b36b..99c87c5ce994 100644
--- a/include/asm-um/ptrace-generic.h
+++ b/include/asm-um/ptrace-generic.h
@@ -8,19 +8,7 @@
#ifndef __ASSEMBLY__
-
-#define pt_regs pt_regs_subarch
-#define show_regs show_regs_subarch
-#define send_sigtrap send_sigtrap_subarch
-
-#include "asm/arch/ptrace.h"
-
-#undef pt_regs
-#undef show_regs
-#undef send_sigtrap
-#undef user_mode
-#undef instruction_pointer
-
+#include "asm/arch/ptrace-abi.h"
#include "sysdep/ptrace.h"
struct pt_regs {
diff --git a/include/asm-um/ptrace-x86_64.h b/include/asm-um/ptrace-x86_64.h
index c894e68b1f96..2074483e6ca4 100644
--- a/include/asm-um/ptrace-x86_64.h
+++ b/include/asm-um/ptrace-x86_64.h
@@ -11,15 +11,11 @@
#include "asm/errno.h"
#include "asm/host_ldt.h"
-#define signal_fault signal_fault_x86_64
#define __FRAME_OFFSETS /* Needed to get the R* macros */
#include "asm/ptrace-generic.h"
-#undef signal_fault
#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
-void signal_fault(struct pt_regs_subarch *regs, void *frame, char *where);
-
#define FS_BASE (21 * sizeof(unsigned long))
#define GS_BASE (22 * sizeof(unsigned long))
#define DS (23 * sizeof(unsigned long))
diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild
index 40f2f13fe174..1ee9b07f3fe6 100644
--- a/include/asm-x86_64/Kbuild
+++ b/include/asm-x86_64/Kbuild
@@ -11,6 +11,7 @@ header-y += debugreg.h
header-y += ldt.h
header-y += msr.h
header-y += prctl.h
+header-y += ptrace-abi.h
header-y += setup.h
header-y += sigcontext32.h
header-y += ucontext.h
diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h
index 670a3388e70a..f65674832318 100644
--- a/include/asm-x86_64/e820.h
+++ b/include/asm-x86_64/e820.h
@@ -46,6 +46,7 @@ extern void setup_memory_region(void);
extern void contig_e820_setup(void);
extern unsigned long e820_end_of_ram(void);
extern void e820_reserve_resources(void);
+extern void e820_mark_nosave_regions(void);
extern void e820_print_map(char *who);
extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h
index 08dd9f9dda81..bffb2f886a51 100644
--- a/include/asm-x86_64/percpu.h
+++ b/include/asm-x86_64/percpu.h
@@ -21,9 +21,15 @@
__attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
/* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
-#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
-#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
+#define per_cpu(var, cpu) (*({ \
+ extern int simple_indentifier_##var(void); \
+ RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)); }))
+#define __get_cpu_var(var) (*({ \
+ extern int simple_indentifier_##var(void); \
+ RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
+#define __raw_get_cpu_var(var) (*({ \
+ extern int simple_indentifier_##var(void); \
+ RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
/* A macro to avoid #include hell... */
#define percpu_modcopy(pcpudst, src, size) \
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index a31ab4e68a9b..51eba2395171 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -101,9 +101,6 @@ static inline void pgd_clear (pgd_t * pgd)
set_pgd(pgd, __pgd(0));
}
-#define pud_page(pud) \
-((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
-
#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte, 0))
struct mm_struct;
@@ -326,7 +323,8 @@ static inline int pmd_large(pmd_t pte) {
/*
* Level 4 access.
*/
-#define pgd_page(pgd) ((unsigned long) __va((unsigned long)pgd_val(pgd) & PTE_MASK))
+#define pgd_page_vaddr(pgd) ((unsigned long) __va((unsigned long)pgd_val(pgd) & PTE_MASK))
+#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
#define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
@@ -335,16 +333,18 @@ static inline int pmd_large(pmd_t pte) {
/* PUD - Level3 access */
/* to find an entry in a page-table-directory. */
+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
+#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
+#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
#define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT)
/* PMD - Level 2 access */
-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
+#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
pmd_index(address))
#define pmd_none(x) (!pmd_val(x))
#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
@@ -382,7 +382,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define pte_index(address) \
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
+#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
pte_index(address))
/* x86-64 always has all page tables mapped. */
diff --git a/include/asm-x86_64/ptrace-abi.h b/include/asm-x86_64/ptrace-abi.h
new file mode 100644
index 000000000000..19184b0806b1
--- /dev/null
+++ b/include/asm-x86_64/ptrace-abi.h
@@ -0,0 +1,51 @@
+#ifndef _X86_64_PTRACE_ABI_H
+#define _X86_64_PTRACE_ABI_H
+
+#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+#define R15 0
+#define R14 8
+#define R13 16
+#define R12 24
+#define RBP 32
+#define RBX 40
+/* arguments: interrupts/non tracing syscalls only save upto here*/
+#define R11 48
+#define R10 56
+#define R9 64
+#define R8 72
+#define RAX 80
+#define RCX 88
+#define RDX 96
+#define RSI 104
+#define RDI 112
+#define ORIG_RAX 120 /* = ERROR */
+/* end of arguments */
+/* cpu exception frame or undefined in case of fast syscall. */
+#define RIP 128
+#define CS 136
+#define EFLAGS 144
+#define RSP 152
+#define SS 160
+#define ARGOFFSET R11
+#endif /* __ASSEMBLY__ */
+
+/* top of stack page */
+#define FRAME_SIZE 168
+
+#define PTRACE_OLDSETOPTIONS 21
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS 12
+#define PTRACE_SETREGS 13
+#define PTRACE_GETFPREGS 14
+#define PTRACE_SETFPREGS 15
+#define PTRACE_GETFPXREGS 18
+#define PTRACE_SETFPXREGS 19
+
+/* only useful for access 32bit programs */
+#define PTRACE_GET_THREAD_AREA 25
+#define PTRACE_SET_THREAD_AREA 26
+
+#define PTRACE_ARCH_PRCTL 30 /* arch_prctl for child */
+
+#endif
diff --git a/include/asm-x86_64/ptrace.h b/include/asm-x86_64/ptrace.h
index ca6f15ff61d4..ab827dc381d7 100644
--- a/include/asm-x86_64/ptrace.h
+++ b/include/asm-x86_64/ptrace.h
@@ -1,40 +1,9 @@
#ifndef _X86_64_PTRACE_H
#define _X86_64_PTRACE_H
-#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
-#define R15 0
-#define R14 8
-#define R13 16
-#define R12 24
-#define RBP 32
-#define RBX 40
-/* arguments: interrupts/non tracing syscalls only save upto here*/
-#define R11 48
-#define R10 56
-#define R9 64
-#define R8 72
-#define RAX 80
-#define RCX 88
-#define RDX 96
-#define RSI 104
-#define RDI 112
-#define ORIG_RAX 120 /* = ERROR */
-/* end of arguments */
-/* cpu exception frame or undefined in case of fast syscall. */
-#define RIP 128
-#define CS 136
-#define EFLAGS 144
-#define RSP 152
-#define SS 160
-#define ARGOFFSET R11
-#endif /* __ASSEMBLY__ */
+#include <asm/ptrace-abi.h>
-/* top of stack page */
-#define FRAME_SIZE 168
-
-#define PTRACE_OLDSETOPTIONS 21
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLY__
struct pt_regs {
unsigned long r15;
@@ -45,7 +14,7 @@ struct pt_regs {
unsigned long rbx;
/* arguments: non interrupts/non tracing syscalls only save upto here*/
unsigned long r11;
- unsigned long r10;
+ unsigned long r10;
unsigned long r9;
unsigned long r8;
unsigned long rax;
@@ -54,32 +23,18 @@ struct pt_regs {
unsigned long rsi;
unsigned long rdi;
unsigned long orig_rax;
-/* end of arguments */
+/* end of arguments */
/* cpu exception frame or undefined */
unsigned long rip;
unsigned long cs;
- unsigned long eflags;
- unsigned long rsp;
+ unsigned long eflags;
+ unsigned long rsp;
unsigned long ss;
-/* top of stack page */
+/* top of stack page */
};
#endif
-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
-#define PTRACE_GETREGS 12
-#define PTRACE_SETREGS 13
-#define PTRACE_GETFPREGS 14
-#define PTRACE_SETFPREGS 15
-#define PTRACE_GETFPXREGS 18
-#define PTRACE_SETFPXREGS 19
-
-/* only useful for access 32bit programs */
-#define PTRACE_GET_THREAD_AREA 25
-#define PTRACE_SET_THREAD_AREA 26
-
-#define PTRACE_ARCH_PRCTL 30 /* arch_prctl for child */
-
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
#define user_mode(regs) (!!((regs)->cs & 3))
#define user_mode_vm(regs) user_mode(regs)
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 6805e1feb300..ce97f65e1d10 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -48,8 +48,6 @@ extern void unlock_ipi_call_lock(void);
extern int smp_num_siblings;
extern void smp_send_reschedule(int cpu);
void smp_stop_cpu(void);
-extern int smp_call_function_single(int cpuid, void (*func) (void *info),
- void *info, int retry, int wait);
extern cpumask_t cpu_sibling_map[NR_CPUS];
extern cpumask_t cpu_core_map[NR_CPUS];
diff --git a/include/asm-xtensa/pgtable.h b/include/asm-xtensa/pgtable.h
index 7b15afb70c56..a47cc734c20c 100644
--- a/include/asm-xtensa/pgtable.h
+++ b/include/asm-xtensa/pgtable.h
@@ -218,7 +218,7 @@ extern pgd_t swapper_pg_dir[PAGE_SIZE/sizeof(pgd_t)];
/*
* The pmd contains the kernel virtual address of the pte page.
*/
-#define pmd_page_kernel(pmd) ((unsigned long)(pmd_val(pmd) & PAGE_MASK))
+#define pmd_page_vaddr(pmd) ((unsigned long)(pmd_val(pmd) & PAGE_MASK))
#define pmd_page(pmd) virt_to_page(pmd_val(pmd))
/*
@@ -349,7 +349,7 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
/* Find an entry in the third-level page table.. */
#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset_kernel(dir,addr) \
- ((pte_t*) pmd_page_kernel(*(dir)) + pte_index(addr))
+ ((pte_t*) pmd_page_vaddr(*(dir)) + pte_index(addr))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir),(addr))
#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir),(addr))
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index e319c649e4fd..31e9abb6d977 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -4,11 +4,8 @@
#ifndef _LINUX_BOOTMEM_H
#define _LINUX_BOOTMEM_H
-#include <asm/pgtable.h>
-#include <asm/dma.h>
-#include <linux/cache.h>
-#include <linux/init.h>
#include <linux/mmzone.h>
+#include <asm/dma.h>
/*
* simple boot-time physical memory area allocator.
@@ -41,45 +38,64 @@ typedef struct bootmem_data {
struct list_head list;
} bootmem_data_t;
-extern unsigned long __init bootmem_bootmap_pages (unsigned long);
-extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend);
-extern void __init free_bootmem (unsigned long addr, unsigned long size);
-extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __init __alloc_bootmem_low(unsigned long size,
- unsigned long align,
- unsigned long goal);
-extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat,
- unsigned long size,
- unsigned long align,
- unsigned long goal);
-extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata,
- unsigned long size, unsigned long align, unsigned long goal,
- unsigned long limit);
+extern unsigned long bootmem_bootmap_pages(unsigned long);
+extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
+extern void free_bootmem(unsigned long addr, unsigned long size);
+extern void *__alloc_bootmem(unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern void *__alloc_bootmem_nopanic(unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern void *__alloc_bootmem_low(unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
+ unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern void *__alloc_bootmem_core(struct bootmem_data *bdata,
+ unsigned long size,
+ unsigned long align,
+ unsigned long goal,
+ unsigned long limit);
+
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
+extern void reserve_bootmem(unsigned long addr, unsigned long size);
#define alloc_bootmem(x) \
- __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low(x) \
- __alloc_bootmem_low((x), SMP_CACHE_BYTES, 0)
+ __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
#define alloc_bootmem_pages(x) \
- __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low_pages(x) \
- __alloc_bootmem_low((x), PAGE_SIZE, 0)
+ __alloc_bootmem_low(x, PAGE_SIZE, 0)
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-extern unsigned long __init free_all_bootmem (void);
-extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
-extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
-extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
-extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
-extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
+
+extern unsigned long free_all_bootmem(void);
+extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
+extern void *__alloc_bootmem_node(pg_data_t *pgdat,
+ unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern unsigned long init_bootmem_node(pg_data_t *pgdat,
+ unsigned long freepfn,
+ unsigned long startpfn,
+ unsigned long endpfn);
+extern void reserve_bootmem_node(pg_data_t *pgdat,
+ unsigned long physaddr,
+ unsigned long size);
+extern void free_bootmem_node(pg_data_t *pgdat,
+ unsigned long addr,
+ unsigned long size);
+
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
#define alloc_bootmem_node(pgdat, x) \
- __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_pages_node(pgdat, x) \
- __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low_pages_node(pgdat, x) \
- __alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0)
+ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
@@ -89,19 +105,19 @@ static inline void *alloc_remap(int nid, unsigned long size)
{
return NULL;
}
-#endif
+#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
extern unsigned long __meminitdata nr_kernel_pages;
extern unsigned long nr_all_pages;
-extern void *__init alloc_large_system_hash(const char *tablename,
- unsigned long bucketsize,
- unsigned long numentries,
- int scale,
- int flags,
- unsigned int *_hash_shift,
- unsigned int *_hash_mask,
- unsigned long limit);
+extern void *alloc_large_system_hash(const char *tablename,
+ unsigned long bucketsize,
+ unsigned long numentries,
+ int scale,
+ int flags,
+ unsigned int *_hash_shift,
+ unsigned int *_hash_mask,
+ unsigned long limit);
#define HASH_HIGHMEM 0x00000001 /* Consider highmem? */
#define HASH_EARLY 0x00000002 /* Allocating during early boot? */
diff --git a/include/linux/console.h b/include/linux/console.h
index 3bdf2155e565..76a1807726eb 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -120,9 +120,14 @@ extern void console_stop(struct console *);
extern void console_start(struct console *);
extern int is_console_locked(void);
+#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
/* Suspend and resume console messages over PM events */
extern void suspend_console(void);
extern void resume_console(void);
+#else
+static inline void suspend_console(void) {}
+static inline void resume_console(void) {}
+#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */
/* Some debug stub to catch some of the obvious races in the VT code */
#if 1
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 8fb344a9abd8..3fef7d67aedc 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu);
static inline int cpu_is_offline(int cpu) { return 0; }
#endif
+#ifdef CONFIG_SUSPEND_SMP
+extern int disable_nonboot_cpus(void);
+extern void enable_nonboot_cpus(void);
+#else
+static inline int disable_nonboot_cpus(void) { return 0; }
+static inline void enable_nonboot_cpus(void) {}
+#endif
+
#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 2d7671c92c0b..d6f4ec467a4b 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -169,6 +169,12 @@ enum {
DCCPO_MAX_CCID_SPECIFIC = 255,
};
+/* DCCP CCIDS */
+enum {
+ DCCPC_CCID2 = 2,
+ DCCPC_CCID3 = 3,
+};
+
/* DCCP features */
enum {
DCCPF_RESERVED = 0,
@@ -320,7 +326,7 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
/* initial values for each feature */
#define DCCPF_INITIAL_SEQUENCE_WINDOW 100
#define DCCPF_INITIAL_ACK_RATIO 2
-#define DCCPF_INITIAL_CCID 2
+#define DCCPF_INITIAL_CCID DCCPC_CCID2
#define DCCPF_INITIAL_SEND_ACK_VECTOR 1
/* FIXME: for now we're default to 1 but it should really be 0 */
#define DCCPF_INITIAL_SEND_NDP_COUNT 1
@@ -404,6 +410,7 @@ struct dccp_service_list {
};
#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
+#define DCCP_SERVICE_CODE_IS_ABSENT 0
static inline int dccp_list_has_service(const struct dccp_service_list *sl,
const __be32 service)
@@ -484,11 +491,6 @@ static inline struct dccp_minisock *dccp_msk(const struct sock *sk)
return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock;
}
-static inline int dccp_service_not_initialized(const struct sock *sk)
-{
- return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE;
-}
-
static inline const char *dccp_role(const struct sock *sk)
{
switch (dccp_sk(sk)->dccps_role) {
diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h
index 6a5796c81c90..666e0a5f00fc 100644
--- a/include/linux/elf-em.h
+++ b/include/linux/elf-em.h
@@ -31,6 +31,7 @@
#define EM_M32R 88 /* Renesas M32R */
#define EM_H8_300 46 /* Renesas H8/300,300H,H8S */
#define EM_FRV 0x5441 /* Fujitsu FR-V */
+#define EM_AVR32 0x18ad /* Atmel AVR32 */
/*
* This is an interim value that we will use until the committee comes
diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
new file mode 100644
index 000000000000..67396db141e8
--- /dev/null
+++ b/include/linux/elfnote.h
@@ -0,0 +1,90 @@
+#ifndef _LINUX_ELFNOTE_H
+#define _LINUX_ELFNOTE_H
+/*
+ * Helper macros to generate ELF Note structures, which are put into a
+ * PT_NOTE segment of the final vmlinux image. These are useful for
+ * including name-value pairs of metadata into the kernel binary (or
+ * modules?) for use by external programs.
+ *
+ * Each note has three parts: a name, a type and a desc. The name is
+ * intended to distinguish the note's originator, so it would be a
+ * company, project, subsystem, etc; it must be in a suitable form for
+ * use in a section name. The type is an integer which is used to tag
+ * the data, and is considered to be within the "name" namespace (so
+ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The
+ * "desc" field is the actual data. There are no constraints on the
+ * desc field's contents, though typically they're fairly small.
+ *
+ * All notes from a given NAME are put into a section named
+ * .note.NAME. When the kernel image is finally linked, all the notes
+ * are packed into a single .notes section, which is mapped into the
+ * PT_NOTE segment. Because notes for a given name are grouped into
+ * the same section, they'll all be adjacent the output file.
+ *
+ * This file defines macros for both C and assembler use. Their
+ * syntax is slightly different, but they're semantically similar.
+ *
+ * See the ELF specification for more detail about ELF notes.
+ */
+
+#ifdef __ASSEMBLER__
+/*
+ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
+ * turn out to be the same size and shape), followed by the name and
+ * desc data with appropriate padding. The 'desctype' argument is the
+ * assembler pseudo op defining the type of the data e.g. .asciz while
+ * 'descdata' is the data itself e.g. "hello, world".
+ *
+ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
+ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
+ */
+#define ELFNOTE(name, type, desctype, descdata) \
+.pushsection .note.name ; \
+ .align 4 ; \
+ .long 2f - 1f /* namesz */ ; \
+ .long 4f - 3f /* descsz */ ; \
+ .long type ; \
+1:.asciz "name" ; \
+2:.align 4 ; \
+3:desctype descdata ; \
+4:.align 4 ; \
+.popsection ;
+#else /* !__ASSEMBLER__ */
+#include <linux/elf.h>
+/*
+ * Use an anonymous structure which matches the shape of
+ * Elf{32,64}_Nhdr, but includes the name and desc data. The size and
+ * type of name and desc depend on the macro arguments. "name" must
+ * be a literal string, and "desc" must be passed by value. You may
+ * only define one note per line, since __LINE__ is used to generate
+ * unique symbols.
+ */
+#define _ELFNOTE_PASTE(a,b) a##b
+#define _ELFNOTE(size, name, unique, type, desc) \
+ static const struct { \
+ struct elf##size##_note _nhdr; \
+ unsigned char _name[sizeof(name)] \
+ __attribute__((aligned(sizeof(Elf##size##_Word)))); \
+ typeof(desc) _desc \
+ __attribute__((aligned(sizeof(Elf##size##_Word)))); \
+ } _ELFNOTE_PASTE(_note_, unique) \
+ __attribute_used__ \
+ __attribute__((section(".note." name), \
+ aligned(sizeof(Elf##size##_Word)), \
+ unused)) = { \
+ { \
+ sizeof(name), \
+ sizeof(desc), \
+ type, \
+ }, \
+ name, \
+ desc \
+ }
+#define ELFNOTE(size, name, type, desc) \
+ _ELFNOTE(size, name, __LINE__, type, desc)
+
+#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
+#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
+#endif /* __ASSEMBLER__ */
+
+#endif /* _LINUX_ELFNOTE_H */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cc9e60844484..8b34aabfe4c6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -9,17 +9,16 @@ struct vm_area_struct;
/*
* GFP bitmasks..
+ *
+ * Zone modifiers (see linux/mmzone.h - low three bits)
+ *
+ * Do not put any conditional on these. If necessary modify the definitions
+ * without the underscores and use the consistently. The definitions here may
+ * be used in bit comparisons.
*/
-/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
#define __GFP_DMA ((__force gfp_t)0x01u)
#define __GFP_HIGHMEM ((__force gfp_t)0x02u)
-#ifdef CONFIG_DMA_IS_DMA32
-#define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */
-#elif BITS_PER_LONG < 64
-#define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */
-#else
-#define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */
-#endif
+#define __GFP_DMA32 ((__force gfp_t)0x04u)
/*
* Action modifiers - doesn't change the zoning
@@ -46,6 +45,7 @@ struct vm_area_struct;
#define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */
#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -54,7 +54,7 @@ struct vm_area_struct;
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
- __GFP_NOMEMALLOC|__GFP_HARDWALL)
+ __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
/* This equals 0, but use constants in case they ever change */
#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
@@ -67,6 +67,8 @@ struct vm_area_struct;
#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
__GFP_HIGHMEM)
+#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
+
/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
platforms, used as appropriate on others */
@@ -76,11 +78,19 @@ struct vm_area_struct;
#define GFP_DMA32 __GFP_DMA32
-static inline int gfp_zone(gfp_t gfp)
+static inline enum zone_type gfp_zone(gfp_t flags)
{
- int zone = GFP_ZONEMASK & (__force int) gfp;
- BUG_ON(zone >= GFP_ZONETYPES);
- return zone;
+ if (flags & __GFP_DMA)
+ return ZONE_DMA;
+#ifdef CONFIG_ZONE_DMA32
+ if (flags & __GFP_DMA32)
+ return ZONE_DMA32;
+#endif
+#ifdef CONFIG_HIGHMEM
+ if (flags & __GFP_HIGHMEM)
+ return ZONE_HIGHMEM;
+#endif
+ return ZONE_NORMAL;
}
/*
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 85ce7ef9a512..fd7d12daa94f 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -24,11 +24,15 @@ static inline void flush_kernel_dcache_page(struct page *page)
/* declarations for linux/mm/highmem.c */
unsigned int nr_free_highpages(void);
+extern unsigned long totalhigh_pages;
#else /* CONFIG_HIGHMEM */
static inline unsigned int nr_free_highpages(void) { return 0; }
+#define totalhigh_pages 0
+
+#ifndef ARCH_HAS_KMAP
static inline void *kmap(struct page *page)
{
might_sleep();
@@ -41,6 +45,7 @@ static inline void *kmap(struct page *page)
#define kunmap_atomic(addr, idx) do { } while (0)
#define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn))
#define kmap_atomic_to_page(ptr) virt_to_page(ptr)
+#endif
#endif /* CONFIG_HIGHMEM */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fbf6d901e9c2..48d3cb3b6a47 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -320,7 +320,9 @@ handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *,
* Monolithic do_IRQ implementation.
* (is an explicit fastcall, because i386 4KSTACKS calls it from assembly)
*/
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
+#endif
/*
* Architectures call this to let the generic IRQ layer
@@ -332,10 +334,14 @@ static inline void generic_handle_irq(unsigned int irq, struct pt_regs *regs)
{
struct irq_desc *desc = irq_desc + irq;
+#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
+ desc->handle_irq(irq, desc, regs);
+#else
if (likely(desc->handle_irq))
desc->handle_irq(irq, desc, regs);
else
__do_IRQ(irq, regs);
+#endif
}
/* Handling of unhandled and spurious interrupts: */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b2ae4fdce8b..e44a37e2c71c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -33,6 +33,7 @@ extern const char linux_banner[];
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL))
#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
#define KERN_EMERG "<0>" /* system is unusable */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 72440f0a443d..09f0f575ddff 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -162,9 +162,9 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
unsigned long addr);
extern unsigned slab_node(struct mempolicy *policy);
-extern int policy_zone;
+extern enum zone_type policy_zone;
-static inline void check_highest_zone(int k)
+static inline void check_highest_zone(enum zone_type k)
{
if (k > policy_zone)
policy_zone = k;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 224178a000d2..856f0ee7e84a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -15,6 +15,7 @@
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/debug_locks.h>
+#include <linux/backing-dev.h>
struct mempolicy;
struct anon_vma;
@@ -218,7 +219,8 @@ struct inode;
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
* moment. Note that we have no way to track which tasks are using
- * a page.
+ * a page, though if it is a pagecache page, rmap structures can tell us
+ * who is mapping it.
*/
struct page {
unsigned long flags; /* Atomic flags, some possibly
@@ -278,6 +280,12 @@ struct page {
*/
#include <linux/page-flags.h>
+#ifdef CONFIG_DEBUG_VM
+#define VM_BUG_ON(cond) BUG_ON(cond)
+#else
+#define VM_BUG_ON(condition) do { } while(0)
+#endif
+
/*
* Methods to modify the page usage count.
*
@@ -292,12 +300,11 @@ struct page {
*/
/*
- * Drop a ref, return true if the logical refcount fell to zero (the page has
- * no users)
+ * Drop a ref, return true if the refcount fell to zero (the page has no users)
*/
static inline int put_page_testzero(struct page *page)
{
- BUG_ON(atomic_read(&page->_count) == 0);
+ VM_BUG_ON(atomic_read(&page->_count) == 0);
return atomic_dec_and_test(&page->_count);
}
@@ -307,11 +314,10 @@ static inline int put_page_testzero(struct page *page)
*/
static inline int get_page_unless_zero(struct page *page)
{
+ VM_BUG_ON(PageCompound(page));
return atomic_inc_not_zero(&page->_count);
}
-extern void FASTCALL(__page_cache_release(struct page *));
-
static inline int page_count(struct page *page)
{
if (unlikely(PageCompound(page)))
@@ -323,6 +329,7 @@ static inline void get_page(struct page *page)
{
if (unlikely(PageCompound(page)))
page = (struct page *)page_private(page);
+ VM_BUG_ON(atomic_read(&page->_count) == 0);
atomic_inc(&page->_count);
}
@@ -349,43 +356,55 @@ void split_page(struct page *page, unsigned int order);
* For the non-reserved pages, page_count(page) denotes a reference count.
* page_count() == 0 means the page is free. page->lru is then used for
* freelist management in the buddy allocator.
- * page_count() == 1 means the page is used for exactly one purpose
- * (e.g. a private data page of one process).
+ * page_count() > 0 means the page has been allocated.
+ *
+ * Pages are allocated by the slab allocator in order to provide memory
+ * to kmalloc and kmem_cache_alloc. In this case, the management of the
+ * page, and the fields in 'struct page' are the responsibility of mm/slab.c
+ * unless a particular usage is carefully commented. (the responsibility of
+ * freeing the kmalloc memory is the caller's, of course).
*
- * A page may be used for kmalloc() or anyone else who does a
- * __get_free_page(). In this case the page_count() is at least 1, and
- * all other fields are unused but should be 0 or NULL. The
- * management of this page is the responsibility of the one who uses
- * it.
+ * A page may be used by anyone else who does a __get_free_page().
+ * In this case, page_count still tracks the references, and should only
+ * be used through the normal accessor functions. The top bits of page->flags
+ * and page->virtual store page management information, but all other fields
+ * are unused and could be used privately, carefully. The management of this
+ * page is the responsibility of the one who allocated it, and those who have
+ * subsequently been given references to it.
*
- * The other pages (we may call them "process pages") are completely
+ * The other pages (we may call them "pagecache pages") are completely
* managed by the Linux memory manager: I/O, buffers, swapping etc.
* The following discussion applies only to them.
*
- * A page may belong to an inode's memory mapping. In this case,
- * page->mapping is the pointer to the inode, and page->index is the
- * file offset of the page, in units of PAGE_CACHE_SIZE.
+ * A pagecache page contains an opaque `private' member, which belongs to the
+ * page's address_space. Usually, this is the address of a circular list of
+ * the page's disk buffers. PG_private must be set to tell the VM to call
+ * into the filesystem to release these pages.
*
- * A page contains an opaque `private' member, which belongs to the
- * page's address_space. Usually, this is the address of a circular
- * list of the page's disk buffers.
+ * A page may belong to an inode's memory mapping. In this case, page->mapping
+ * is the pointer to the inode, and page->index is the file offset of the page,
+ * in units of PAGE_CACHE_SIZE.
*
- * For pages belonging to inodes, the page_count() is the number of
- * attaches, plus 1 if `private' contains something, plus one for
- * the page cache itself.
+ * If pagecache pages are not associated with an inode, they are said to be
+ * anonymous pages. These may become associated with the swapcache, and in that
+ * case PG_swapcache is set, and page->private is an offset into the swapcache.
*
- * Instead of keeping dirty/clean pages in per address-space lists, we instead
- * now tag pages as dirty/under writeback in the radix tree.
+ * In either case (swapcache or inode backed), the pagecache itself holds one
+ * reference to the page. Setting PG_private should also increment the
+ * refcount. The each user mapping also has a reference to the page.
*
- * There is also a per-mapping radix tree mapping index to the page
- * in memory if present. The tree is rooted at mapping->root.
+ * The pagecache pages are stored in a per-mapping radix tree, which is
+ * rooted at mapping->page_tree, and indexed by offset.
+ * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
+ * lists, we instead now tag pages as dirty/writeback in the radix tree.
*
- * All process pages can do I/O:
+ * All pagecache pages may be subject to I/O:
* - inode pages may need to be read from disk,
* - inode pages which have been modified and are MAP_SHARED may need
- * to be written to disk,
- * - private pages which have been modified may need to be swapped out
- * to swap space and (later) to be read back into memory.
+ * to be written back to the inode on disk,
+ * - anonymous pages (including MAP_PRIVATE file mappings) which have been
+ * modified may need to be swapped out to swap space and (later) to be read
+ * back into memory.
*/
/*
@@ -463,7 +482,7 @@ void split_page(struct page *page, unsigned int order);
#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
#define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1)
-static inline unsigned long page_zonenum(struct page *page)
+static inline enum zone_type page_zonenum(struct page *page)
{
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}
@@ -480,23 +499,29 @@ static inline struct zone *page_zone(struct page *page)
return zone_table[page_zone_id(page)];
}
+static inline unsigned long zone_to_nid(struct zone *zone)
+{
+ return zone->zone_pgdat->node_id;
+}
+
static inline unsigned long page_to_nid(struct page *page)
{
if (FLAGS_HAS_NODE)
return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
else
- return page_zone(page)->zone_pgdat->node_id;
+ return zone_to_nid(page_zone(page));
}
static inline unsigned long page_to_section(struct page *page)
{
return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
}
-static inline void set_page_zone(struct page *page, unsigned long zone)
+static inline void set_page_zone(struct page *page, enum zone_type zone)
{
page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
}
+
static inline void set_page_node(struct page *page, unsigned long node)
{
page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
@@ -508,7 +533,7 @@ static inline void set_page_section(struct page *page, unsigned long section)
page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
}
-static inline void set_page_links(struct page *page, unsigned long zone,
+static inline void set_page_links(struct page *page, enum zone_type zone,
unsigned long node, unsigned long pfn)
{
set_page_zone(page, zone);
@@ -802,6 +827,39 @@ struct shrinker;
extern struct shrinker *set_shrinker(int, shrinker_t);
extern void remove_shrinker(struct shrinker *shrinker);
+/*
+ * Some shared mappigns will want the pages marked read-only
+ * to track write events. If so, we'll downgrade vm_page_prot
+ * to the private version (using protection_map[] without the
+ * VM_SHARED bit).
+ */
+static inline int vma_wants_writenotify(struct vm_area_struct *vma)
+{
+ unsigned int vm_flags = vma->vm_flags;
+
+ /* If it was private or non-writable, the write bit is already clear */
+ if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
+ return 0;
+
+ /* The backer wishes to know when pages are first written to? */
+ if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+ return 1;
+
+ /* The open routine did something to the protections already? */
+ if (pgprot_val(vma->vm_page_prot) !=
+ pgprot_val(protection_map[vm_flags &
+ (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]))
+ return 0;
+
+ /* Specialty mapping? */
+ if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
+ return 0;
+
+ /* Can the mapping track the dirty pages? */
+ return vma->vm_file && vma->vm_file->f_mapping &&
+ mapping_cap_account_dirty(vma->vm_file->f_mapping);
+}
+
extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f45163c528e8..3693f1a52788 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -51,7 +51,8 @@ enum zone_stat_item {
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
only modified from process context */
NR_FILE_PAGES,
- NR_SLAB, /* Pages used by slab allocator */
+ NR_SLAB_RECLAIMABLE,
+ NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */
NR_FILE_DIRTY,
NR_WRITEBACK,
@@ -88,53 +89,68 @@ struct per_cpu_pageset {
#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
#endif
-#define ZONE_DMA 0
-#define ZONE_DMA32 1
-#define ZONE_NORMAL 2
-#define ZONE_HIGHMEM 3
-
-#define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */
-#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */
-
+enum zone_type {
+ /*
+ * ZONE_DMA is used when there are devices that are not able
+ * to do DMA to all of addressable memory (ZONE_NORMAL). Then we
+ * carve out the portion of memory that is needed for these devices.
+ * The range is arch specific.
+ *
+ * Some examples
+ *
+ * Architecture Limit
+ * ---------------------------
+ * parisc, ia64, sparc <4G
+ * s390 <2G
+ * arm26 <48M
+ * arm Various
+ * alpha Unlimited or 0-16MB.
+ *
+ * i386, x86_64 and multiple other arches
+ * <16M.
+ */
+ ZONE_DMA,
+#ifdef CONFIG_ZONE_DMA32
+ /*
+ * x86_64 needs two ZONE_DMAs because it supports devices that are
+ * only able to do DMA to the lower 16M but also 32 bit devices that
+ * can only do DMA areas below 4G.
+ */
+ ZONE_DMA32,
+#endif
+ /*
+ * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
+ * performed on pages in ZONE_NORMAL if the DMA devices support
+ * transfers to all addressable memory.
+ */
+ ZONE_NORMAL,
+#ifdef CONFIG_HIGHMEM
+ /*
+ * A memory area that is only addressable by the kernel through
+ * mapping portions into its own address space. This is for example
+ * used by i386 to allow the kernel to address the memory beyond
+ * 900MB. The kernel will set up special mappings (page
+ * table entries on i386) for each page that the kernel needs to
+ * access.
+ */
+ ZONE_HIGHMEM,
+#endif
+ MAX_NR_ZONES
+};
/*
* When a memory allocation must conform to specific limitations (such
* as being suitable for DMA) the caller will pass in hints to the
* allocator in the gfp_mask, in the zone modifier bits. These bits
* are used to select a priority ordered list of memory zones which
- * match the requested limits. GFP_ZONEMASK defines which bits within
- * the gfp_mask should be considered as zone modifiers. Each valid
- * combination of the zone modifier bits has a corresponding list
- * of zones (in node_zonelists). Thus for two zone modifiers there
- * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will
- * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible
- * combinations of zone modifiers in "zone modifier space".
- *
- * As an optimisation any zone modifier bits which are only valid when
- * no other zone modifier bits are set (loners) should be placed in
- * the highest order bits of this field. This allows us to reduce the
- * extent of the zonelists thus saving space. For example in the case
- * of three zone modifier bits, we could require up to eight zonelists.
- * If the left most zone modifier is a "loner" then the highest valid
- * zonelist would be four allowing us to allocate only five zonelists.
- * Use the first form for GFP_ZONETYPES when the left most bit is not
- * a "loner", otherwise use the second.
- *
- * NOTE! Make sure this matches the zones in <linux/gfp.h>
+ * match the requested limits. See gfp_zone() in include/linux/gfp.h
*/
-#define GFP_ZONEMASK 0x07
-/* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */
-#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */
-/*
- * On machines where it is needed (eg PCs) we divide physical memory
- * into multiple physical zones. On a 32bit PC we have 4 zones:
- *
- * ZONE_DMA < 16 MB ISA DMA capable memory
- * ZONE_DMA32 0 MB Empty
- * ZONE_NORMAL 16-896 MB direct mapped by the kernel
- * ZONE_HIGHMEM > 896 MB only page cache and user processes
- */
+#if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM)
+#define ZONES_SHIFT 1
+#else
+#define ZONES_SHIFT 2
+#endif
struct zone {
/* Fields commonly accessed by the page allocator */
@@ -154,7 +170,8 @@ struct zone {
/*
* zone reclaim becomes active if more unmapped pages exist.
*/
- unsigned long min_unmapped_ratio;
+ unsigned long min_unmapped_pages;
+ unsigned long min_slab_pages;
struct per_cpu_pageset *pageset[NR_CPUS];
#else
struct per_cpu_pageset pageset[NR_CPUS];
@@ -266,7 +283,6 @@ struct zone {
char *name;
} ____cacheline_internodealigned_in_smp;
-
/*
* The "priority" of VM scanning is how much of the queues we will scan in one
* go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
@@ -304,7 +320,7 @@ struct zonelist {
struct bootmem_data;
typedef struct pglist_data {
struct zone node_zones[MAX_NR_ZONES];
- struct zonelist node_zonelists[GFP_ZONETYPES];
+ struct zonelist node_zonelists[MAX_NR_ZONES];
int nr_zones;
#ifdef CONFIG_FLAT_NODE_MEM_MAP
struct page *node_mem_map;
@@ -373,12 +389,16 @@ static inline int populated_zone(struct zone *zone)
return (!!zone->present_pages);
}
-static inline int is_highmem_idx(int idx)
+static inline int is_highmem_idx(enum zone_type idx)
{
+#ifdef CONFIG_HIGHMEM
return (idx == ZONE_HIGHMEM);
+#else
+ return 0;
+#endif
}
-static inline int is_normal_idx(int idx)
+static inline int is_normal_idx(enum zone_type idx)
{
return (idx == ZONE_NORMAL);
}
@@ -391,7 +411,11 @@ static inline int is_normal_idx(int idx)
*/
static inline int is_highmem(struct zone *zone)
{
+#ifdef CONFIG_HIGHMEM
return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
+#else
+ return 0;
+#endif
}
static inline int is_normal(struct zone *zone)
@@ -401,7 +425,11 @@ static inline int is_normal(struct zone *zone)
static inline int is_dma32(struct zone *zone)
{
+#ifdef CONFIG_ZONE_DMA32
return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
+#else
+ return 0;
+#endif
}
static inline int is_dma(struct zone *zone)
@@ -421,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file
void __user *, size_t *, loff_t *);
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
struct file *, void __user *, size_t *, loff_t *);
+int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
+ struct file *, void __user *, size_t *, loff_t *);
#include <linux/topology.h>
/* Returns the number of the current Node. */
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 9a285cecf249..312bd2ffee33 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -10,6 +10,8 @@ header-y += xt_connmark.h
header-y += xt_CONNMARK.h
header-y += xt_conntrack.h
header-y += xt_dccp.h
+header-y += xt_dscp.h
+header-y += xt_DSCP.h
header-y += xt_esp.h
header-y += xt_helper.h
header-y += xt_length.h
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5748642e9f36..9d7921dd50f0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -13,24 +13,25 @@
* PG_reserved is set for special pages, which can never be swapped out. Some
* of them might not even exist (eg empty_bad_page)...
*
- * The PG_private bitflag is set if page->private contains a valid value.
+ * The PG_private bitflag is set on pagecache pages if they contain filesystem
+ * specific data (which is normally at page->private). It can be used by
+ * private allocations for its own usage.
*
- * During disk I/O, PG_locked is used. This bit is set before I/O and
- * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks
- * waiting for the I/O on this page to complete.
+ * During initiation of disk I/O, PG_locked is set. This bit is set before I/O
+ * and cleared when writeback _starts_ or when read _completes_. PG_writeback
+ * is set before writeback starts and cleared when it finishes.
+ *
+ * PG_locked also pins a page in pagecache, and blocks truncation of the file
+ * while it is held.
+ *
+ * page_waitqueue(page) is a wait queue of all tasks waiting for the page
+ * to become unlocked.
*
* PG_uptodate tells whether the page's contents is valid. When a read
* completes, the page becomes uptodate, unless a disk I/O error happened.
*
- * For choosing which pages to swap out, inode pages carry a PG_referenced bit,
- * which is set any time the system accesses that page through the (mapping,
- * index) hash table. This referenced bit, together with the referenced bit
- * in the page tables, is used to manipulate page->age and move the page across
- * the active, inactive_dirty and inactive_clean lists.
- *
- * Note that the referenced bit, the page->lru list_head and the active,
- * inactive_dirty and inactive_clean lists are protected by the
- * zone->lru_lock, and *NOT* by the usual PG_locked bit!
+ * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
+ * file-backed pagecache (see mm/vmscan.c).
*
* PG_error is set to indicate that an I/O error occurred on this page.
*
@@ -42,6 +43,10 @@
* space, they need to be kmapped separately for doing IO on the pages. The
* struct page (these bits with information) are always mapped into kernel
* address space...
+ *
+ * PG_buddy is set to indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ *
*/
/*
@@ -74,7 +79,7 @@
#define PG_checked 8 /* kill me in 2.5.<early>. */
#define PG_arch_1 9
#define PG_reserved 10
-#define PG_private 11 /* Has something at ->private */
+#define PG_private 11 /* If pagecache, has fs-private data */
#define PG_writeback 12 /* Page is under writeback */
#define PG_nosave 13 /* Used for system suspend/resume */
@@ -83,7 +88,7 @@
#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */
#define PG_reclaim 17 /* To be reclaimed asap */
-#define PG_nosave_free 18 /* Free, should not be written */
+#define PG_nosave_free 18 /* Used for system suspend/resume */
#define PG_buddy 19 /* Page is free, on buddy lists */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0a2f5d27f60e..64f950925151 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -130,14 +130,29 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
}
extern void FASTCALL(__lock_page(struct page *page));
+extern void FASTCALL(__lock_page_nosync(struct page *page));
extern void FASTCALL(unlock_page(struct page *page));
+/*
+ * lock_page may only be called if we have the page's inode pinned.
+ */
static inline void lock_page(struct page *page)
{
might_sleep();
if (TestSetPageLocked(page))
__lock_page(page);
}
+
+/*
+ * lock_page_nosync should only be used if we can't pin the page's inode.
+ * Doesn't play quite so well with block device plugging.
+ */
+static inline void lock_page_nosync(struct page *page)
+{
+ might_sleep();
+ if (TestSetPageLocked(page))
+ __lock_page_nosync(page);
+}
/*
* This is exported only for wait_on_page_locked/wait_on_page_writeback.
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index cb9039a21f2a..3835a9642f13 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -1,9 +1,12 @@
#ifndef __LINUX_PERCPU_H
#define __LINUX_PERCPU_H
+
#include <linux/spinlock.h> /* For preempt_disable() */
#include <linux/slab.h> /* For kmalloc() */
#include <linux/smp.h>
#include <linux/string.h> /* For memset() */
+#include <linux/cpumask.h>
+
#include <asm/percpu.h>
/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
@@ -11,8 +14,14 @@
#define PERCPU_ENOUGH_ROOM 32768
#endif
-/* Must be an lvalue. */
-#define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); }))
+/*
+ * Must be an lvalue. Since @var must be a simple identifier,
+ * we force a syntax error here if it isn't.
+ */
+#define get_cpu_var(var) (*({ \
+ extern int simple_indentifier_##var(void); \
+ preempt_disable(); \
+ &__get_cpu_var(var); }))
#define put_cpu_var(var) preempt_enable()
#ifdef CONFIG_SMP
@@ -21,39 +30,77 @@ struct percpu_data {
void *ptrs[NR_CPUS];
};
+#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
/*
- * Use this to get to a cpu's version of the per-cpu object allocated using
- * alloc_percpu. Non-atomic access to the current CPU's version should
+ * Use this to get to a cpu's version of the per-cpu object dynamically
+ * allocated. Non-atomic access to the current CPU's version should
* probably be combined with get_cpu()/put_cpu().
*/
-#define per_cpu_ptr(ptr, cpu) \
-({ \
- struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \
- (__typeof__(ptr))__p->ptrs[(cpu)]; \
+#define percpu_ptr(ptr, cpu) \
+({ \
+ struct percpu_data *__p = __percpu_disguise(ptr); \
+ (__typeof__(ptr))__p->ptrs[(cpu)]; \
})
-extern void *__alloc_percpu(size_t size);
-extern void free_percpu(const void *);
+extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu);
+extern void percpu_depopulate(void *__pdata, int cpu);
+extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+ cpumask_t *mask);
+extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask);
+extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
+extern void percpu_free(void *__pdata);
#else /* CONFIG_SMP */
-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+
+static inline void percpu_depopulate(void *__pdata, int cpu)
+{
+}
+
+static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+{
+}
-static inline void *__alloc_percpu(size_t size)
+static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp,
+ int cpu)
{
- void *ret = kmalloc(size, GFP_KERNEL);
- if (ret)
- memset(ret, 0, size);
- return ret;
+ return percpu_ptr(__pdata, cpu);
}
-static inline void free_percpu(const void *ptr)
-{
- kfree(ptr);
+
+static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+ cpumask_t *mask)
+{
+ return 0;
+}
+
+static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
+{
+ return kzalloc(size, gfp);
+}
+
+static inline void percpu_free(void *__pdata)
+{
+ kfree(__pdata);
}
#endif /* CONFIG_SMP */
-/* Simple wrapper for the common case: zeros memory. */
-#define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type))))
+#define percpu_populate_mask(__pdata, size, gfp, mask) \
+ __percpu_populate_mask((__pdata), (size), (gfp), &(mask))
+#define percpu_depopulate_mask(__pdata, mask) \
+ __percpu_depopulate_mask((__pdata), &(mask))
+#define percpu_alloc_mask(size, gfp, mask) \
+ __percpu_alloc_mask((size), (gfp), &(mask))
+
+#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map)
+
+/* (legacy) interface for use without CPU hotplug handling */
+
+#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \
+ cpu_possible_map)
+#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type))
+#define free_percpu(ptr) percpu_free((ptr))
+#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu))
#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index a376bd4ade39..81e9299ca148 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -3,21 +3,25 @@
#ifdef CONFIG_PM_TRACE
+extern int pm_trace_enabled;
+
struct device;
extern void set_trace_device(struct device *);
extern void generate_resume_trace(void *tracedata, unsigned int user);
#define TRACE_DEVICE(dev) set_trace_device(dev)
-#define TRACE_RESUME(user) do { \
- void *tracedata; \
- asm volatile("movl $1f,%0\n" \
- ".section .tracedata,\"a\"\n" \
- "1:\t.word %c1\n" \
- "\t.long %c2\n" \
- ".previous" \
- :"=r" (tracedata) \
- : "i" (__LINE__), "i" (__FILE__)); \
- generate_resume_trace(tracedata, user); \
+#define TRACE_RESUME(user) do { \
+ if (pm_trace_enabled) { \
+ void *tracedata; \
+ asm volatile("movl $1f,%0\n" \
+ ".section .tracedata,\"a\"\n" \
+ "1:\t.word %c1\n" \
+ "\t.long %c2\n" \
+ ".previous" \
+ :"=r" (tracedata) \
+ : "i" (__LINE__), "i" (__FILE__)); \
+ generate_resume_trace(tracedata, user); \
+ } \
} while (0)
#else
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf97b0900014..db2c1df4fef9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *,
*/
unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
+/*
+ * Cleans the PTEs of shared mappings.
+ * (and since clean PTEs should also be readonly, write protects them too)
+ *
+ * returns the number of cleaned PTEs.
+ */
+int page_mkclean(struct page *);
+
#else /* !CONFIG_MMU */
#define anon_vma_init() do {} while (0)
@@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
#define page_referenced(page,l) TestClearPageReferenced(page)
#define try_to_unmap(page, refs) SWAP_FAIL
+static inline int page_mkclean(struct page *page)
+{
+ return 0;
+}
+
+
#endif /* CONFIG_MMU */
/*
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index aad4e390d6a5..d1b7ca6c1c57 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -46,7 +46,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
/**
* selinux_audit_rule_match - determine if a context ID matches a rule.
- * @ctxid: the context ID to check
+ * @sid: the context ID to check
* @field: the field this rule refers to
* @op: the operater the rule uses
* @rule: pointer to the audit rule to check against
@@ -55,7 +55,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
* Returns 1 if the context id matches the rule, 0 if it does not, and
* -errno on failure.
*/
-int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
struct selinux_audit_rule *rule,
struct audit_context *actx);
@@ -70,18 +70,8 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
void selinux_audit_set_callback(int (*callback)(void));
/**
- * selinux_task_ctxid - determine a context ID for a process.
- * @tsk: the task object
- * @ctxid: ID value returned via this
- *
- * On return, ctxid will contain an ID for the context. This value
- * should only be used opaquely.
- */
-void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
-
-/**
- * selinux_ctxid_to_string - map a security context ID to a string
- * @ctxid: security context ID to be converted.
+ * selinux_sid_to_string - map a security context ID to a string
+ * @sid: security context ID to be converted.
* @ctx: address of context string to be returned
* @ctxlen: length of returned context string.
*
@@ -89,7 +79,7 @@ void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
* string will be allocated internally, and the caller must call
* kfree() on it after use.
*/
-int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen);
+int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen);
/**
* selinux_get_inode_sid - get the inode's security context ID
@@ -154,7 +144,7 @@ static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule)
return;
}
-static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+static inline int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
struct selinux_audit_rule *rule,
struct audit_context *actx)
{
@@ -166,12 +156,7 @@ static inline void selinux_audit_set_callback(int (*callback)(void))
return;
}
-static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
-{
- *ctxid = 0;
-}
-
-static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
+static inline int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen)
{
*ctx = NULL;
*ctxlen = 0;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 45ad55b70d1c..66d6eb78d1c6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -67,7 +67,6 @@ extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
extern void kmem_cache_free(kmem_cache_t *, void *);
extern unsigned int kmem_cache_size(kmem_cache_t *);
extern const char *kmem_cache_name(kmem_cache_t *);
-extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags);
/* Size description struct for general caches. */
struct cache_sizes {
@@ -203,7 +202,30 @@ extern int slab_is_available(void);
#ifdef CONFIG_NUMA
extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node);
-extern void *kmalloc_node(size_t size, gfp_t flags, int node);
+extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
+
+static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+{
+ if (__builtin_constant_p(size)) {
+ int i = 0;
+#define CACHE(x) \
+ if (size <= x) \
+ goto found; \
+ else \
+ i++;
+#include "kmalloc_sizes.h"
+#undef CACHE
+ {
+ extern void __you_cannot_kmalloc_that_much(void);
+ __you_cannot_kmalloc_that_much();
+ }
+found:
+ return kmem_cache_alloc_node((flags & GFP_DMA) ?
+ malloc_sizes[i].cs_dmacachep :
+ malloc_sizes[i].cs_cachep, flags, node);
+ }
+ return __kmalloc_node(size, flags, node);
+}
#else
static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node)
{
@@ -223,7 +245,6 @@ extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
/* SLOB allocator routines */
void kmem_cache_init(void);
-struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags);
struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t,
unsigned long,
void (*)(void *, struct kmem_cache *, unsigned long),
@@ -263,8 +284,6 @@ extern kmem_cache_t *fs_cachep;
extern kmem_cache_t *sighand_cachep;
extern kmem_cache_t *bio_cachep;
-extern atomic_t slab_reclaim_pages;
-
#endif /* __KERNEL__ */
#endif /* _LINUX_SLAB_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 837e8bce1349..51649987f691 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -53,6 +53,9 @@ extern void smp_cpus_done(unsigned int max_cpus);
*/
int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
+int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
+ int retry, int wait);
+
/*
* Call a function on all processors
*/
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 96e31aa64cc7..b1237f16ecde 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -10,29 +10,11 @@
#include <linux/pm.h>
/* page backup entry */
-typedef struct pbe {
+struct pbe {
unsigned long address; /* address of the copy */
unsigned long orig_address; /* original address of page */
struct pbe *next;
-} suspend_pagedir_t;
-
-#define for_each_pbe(pbe, pblist) \
- for (pbe = pblist ; pbe ; pbe = pbe->next)
-
-#define PBES_PER_PAGE (PAGE_SIZE/sizeof(struct pbe))
-#define PB_PAGE_SKIP (PBES_PER_PAGE-1)
-
-#define for_each_pb_page(pbe, pblist) \
- for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next)
-
-
-#define SWAP_FILENAME_MAXLENGTH 32
-
-
-extern dev_t swsusp_resume_device;
-
-/* mm/vmscan.c */
-extern int shrink_mem(void);
+};
/* mm/page_alloc.c */
extern void drain_local_pages(void);
@@ -53,18 +35,10 @@ static inline void pm_restore_console(void) {}
static inline int software_suspend(void)
{
printk("Warning: fake suspend called\n");
- return -EPERM;
+ return -ENOSYS;
}
#endif /* CONFIG_PM */
-#ifdef CONFIG_SUSPEND_SMP
-extern void disable_nonboot_cpus(void);
-extern void enable_nonboot_cpus(void);
-#else
-static inline void disable_nonboot_cpus(void) {}
-static inline void enable_nonboot_cpus(void) {}
-#endif
-
void save_processor_state(void);
void restore_processor_state(void);
struct saved_context;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5e59184c9096..e7c36ba2a2db 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -10,6 +10,10 @@
#include <asm/atomic.h>
#include <asm/page.h>
+struct notifier_block;
+
+struct bio;
+
#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
#define SWAP_FLAG_PRIO_MASK 0x7fff
#define SWAP_FLAG_PRIO_SHIFT 0
@@ -156,13 +160,14 @@ struct swap_list_t {
/* linux/mm/oom_kill.c */
extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
+extern int register_oom_notifier(struct notifier_block *nb);
+extern int unregister_oom_notifier(struct notifier_block *nb);
/* linux/mm/memory.c */
extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
/* linux/mm/page_alloc.c */
extern unsigned long totalram_pages;
-extern unsigned long totalhigh_pages;
extern unsigned long totalreserve_pages;
extern long nr_swap_pages;
extern unsigned int nr_free_pages(void);
@@ -190,6 +195,7 @@ extern long vm_total_pages;
#ifdef CONFIG_NUMA
extern int zone_reclaim_mode;
extern int sysctl_min_unmapped_ratio;
+extern int sysctl_min_slab_ratio;
extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
#else
#define zone_reclaim_mode 0
@@ -212,7 +218,9 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
/* linux/mm/page_io.c */
extern int swap_readpage(struct file *, struct page *);
extern int swap_writepage(struct page *page, struct writeback_control *wbc);
-extern int rw_swap_page_sync(int, swp_entry_t, struct page *);
+extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
+ struct bio **bio_chain);
+extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err);
/* linux/mm/swap_state.c */
extern struct address_space swapper_space;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 736ed917a4f8..eca555781d05 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -191,6 +191,7 @@ enum
VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */
VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
+ VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */
};
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 71b6363caaaf..dee88c6b6fa7 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -44,8 +44,6 @@ extern void *vmalloc_32_user(unsigned long size);
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot);
-extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
- pgprot_t prot, int node);
extern void vfree(void *addr);
extern void *vmap(struct page **pages, unsigned int count,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2d9b1b60798a..176c7f797339 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -18,7 +18,19 @@
* generated will simply be the increment of a global address.
*/
-#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH
+#ifdef CONFIG_ZONE_DMA32
+#define DMA32_ZONE(xx) xx##_DMA32,
+#else
+#define DMA32_ZONE(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define HIGHMEM_ZONE(xx) , xx##_HIGH
+#else
+#define HIGHMEM_ZONE(xx)
+#endif
+
+#define FOR_ALL_ZONES(xx) xx##_DMA, DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx)
enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
FOR_ALL_ZONES(PGALLOC),
@@ -124,12 +136,10 @@ static inline unsigned long node_page_state(int node,
struct zone *zones = NODE_DATA(node)->node_zones;
return
-#ifndef CONFIG_DMA_IS_NORMAL
-#if !defined(CONFIG_DMA_IS_DMA32) && BITS_PER_LONG >= 64
+#ifdef CONFIG_ZONE_DMA32
zone_page_state(&zones[ZONE_DMA32], item) +
#endif
zone_page_state(&zones[ZONE_NORMAL], item) +
-#endif
#ifdef CONFIG_HIGHMEM
zone_page_state(&zones[ZONE_HIGHMEM], item) +
#endif
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 0422036af4eb..56a23a0e7f2e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -116,6 +116,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
loff_t pos, loff_t count);
int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
loff_t pos, loff_t count);
+void set_page_dirty_balance(struct page *page);
/* pdflush.c */
extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 59406e0dc5b2..2d72496c2029 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -130,8 +130,9 @@ extern int cipso_v4_rbm_strictvalid;
int cipso_v4_doi_add(struct cipso_v4_doi *doi_def);
int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head));
struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi);
-struct sk_buff *cipso_v4_doi_dump_all(size_t headroom);
-struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom);
+int cipso_v4_doi_walk(u32 *skip_cnt,
+ int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
+ void *cb_arg);
int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain);
int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
const char *domain);
@@ -152,14 +153,11 @@ static inline struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
return NULL;
}
-static inline struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
+static inline int cipso_v4_doi_walk(u32 *skip_cnt,
+ int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
+ void *cb_arg)
{
- return NULL;
-}
-
-static inline struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
-{
- return NULL;
+ return 0;
}
static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def,
@@ -205,6 +203,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway);
int cipso_v4_socket_setattr(const struct socket *sock,
const struct cipso_v4_doi *doi_def,
const struct netlbl_lsm_secattr *secattr);
+int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr);
int cipso_v4_socket_getattr(const struct socket *sock,
struct netlbl_lsm_secattr *secattr);
int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
@@ -225,6 +224,12 @@ static inline int cipso_v4_socket_setattr(const struct socket *sock,
return -ENOSYS;
}
+static inline int cipso_v4_sock_getattr(struct sock *sk,
+ struct netlbl_lsm_secattr *secattr)
+{
+ return -ENOSYS;
+}
+
static inline int cipso_v4_socket_getattr(const struct socket *sock,
struct netlbl_lsm_secattr *secattr)
{
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index dd5780b36919..6692430063fd 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -57,9 +57,8 @@
* The payload is dependent on the subsystem specified in the
* 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions
* should be defined in the corresponding net/netlabel/netlabel_<subsys>.h|c
- * file. All of the fields in the NetLabel payload are NETLINK attributes, the
- * length of each field is the length of the NETLINK attribute payload, see
- * include/net/netlink.h for more information on NETLINK attributes.
+ * file. All of the fields in the NetLabel payload are NETLINK attributes, see
+ * the include/net/netlink.h file for more information on NETLINK attributes.
*
*/
@@ -82,50 +81,6 @@
#define NETLBL_NLTYPE_UNLABELED 5
#define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL"
-/* NetLabel return codes */
-#define NETLBL_E_OK 0
-
-/*
- * Helper functions
- */
-
-#define NETLBL_LEN_U8 nla_total_size(sizeof(u8))
-#define NETLBL_LEN_U16 nla_total_size(sizeof(u16))
-#define NETLBL_LEN_U32 nla_total_size(sizeof(u32))
-
-/**
- * netlbl_netlink_alloc_skb - Allocate a NETLINK message buffer
- * @head: the amount of headroom in bytes
- * @body: the desired size (minus headroom) in bytes
- * @gfp_flags: the alloc flags to pass to alloc_skb()
- *
- * Description:
- * Allocate a NETLINK message buffer based on the sizes given in @head and
- * @body. If @head is greater than zero skb_reserve() is called to reserve
- * @head bytes at the start of the buffer. Returns a valid sk_buff pointer on
- * success, NULL on failure.
- *
- */
-static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head,
- size_t body,
- gfp_t gfp_flags)
-{
- struct sk_buff *skb;
-
- skb = alloc_skb(NLMSG_ALIGN(head + body), gfp_flags);
- if (skb == NULL)
- return NULL;
- if (head > 0) {
- skb_reserve(skb, head);
- if (skb_tailroom(skb) < body) {
- kfree_skb(skb);
- return NULL;
- }
- }
-
- return skb;
-}
-
/*
* NetLabel - Kernel API for accessing the network packet label mappings.
*
@@ -238,6 +193,8 @@ static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr,
#ifdef CONFIG_NETLABEL
int netlbl_socket_setattr(const struct socket *sock,
const struct netlbl_lsm_secattr *secattr);
+int netlbl_sock_getattr(struct sock *sk,
+ struct netlbl_lsm_secattr *secattr);
int netlbl_socket_getattr(const struct socket *sock,
struct netlbl_lsm_secattr *secattr);
int netlbl_skbuff_getattr(const struct sk_buff *skb,
@@ -250,6 +207,12 @@ static inline int netlbl_socket_setattr(const struct socket *sock,
return -ENOSYS;
}
+static inline int netlbl_sock_getattr(struct sock *sk,
+ struct netlbl_lsm_secattr *secattr)
+{
+ return -ENOSYS;
+}
+
static inline int netlbl_socket_getattr(const struct socket *sock,
struct netlbl_lsm_secattr *secattr)
{
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 11dc2e7f679a..4ab68a7a636a 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -146,11 +146,13 @@
* nla_ok(nla, remaining) does nla fit into remaining bytes?
* nla_next(nla, remaining) get next netlink attribute
* nla_validate() validate a stream of attributes
+ * nla_validate_nested() validate a stream of nested attributes
* nla_find() find attribute in stream of attributes
* nla_find_nested() find attribute in nested attributes
* nla_parse() parse and validate stream of attrs
* nla_parse_nested() parse nested attribuets
* nla_for_each_attr() loop over all attributes
+ * nla_for_each_nested() loop over the nested attributes
*=========================================================================
*/
@@ -950,6 +952,24 @@ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
}
/**
+ * nla_validate_nested - Validate a stream of nested attributes
+ * @start: container attribute
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ *
+ * Validates all attributes in the nested attribute stream against the
+ * specified policy. Attributes with a type exceeding maxtype will be
+ * ignored. See documenation of struct nla_policy for more details.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+static inline int nla_validate_nested(struct nlattr *start, int maxtype,
+ struct nla_policy *policy)
+{
+ return nla_validate(nla_data(start), nla_len(start), maxtype, policy);
+}
+
+/**
* nla_for_each_attr - iterate over a stream of attributes
* @pos: loop counter, set to current attribute
* @head: head of attribute stream
diff --git a/kernel/audit.c b/kernel/audit.c
index 963fd15c9621..f9889ee77825 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -244,7 +244,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
char *ctx = NULL;
u32 len;
int rc;
- if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
return rc;
else
audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -267,7 +267,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
char *ctx = NULL;
u32 len;
int rc;
- if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
return rc;
else
audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -293,7 +293,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
char *ctx = NULL;
u32 len;
int rc;
- if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
return rc;
else
audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -321,7 +321,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid)
char *ctx = NULL;
u32 len;
int rc;
- if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
return rc;
else
audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -538,7 +538,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (status_get->mask & AUDIT_STATUS_PID) {
int old = audit_pid;
if (sid) {
- if ((err = selinux_ctxid_to_string(
+ if ((err = selinux_sid_to_string(
sid, &ctx, &len)))
return err;
else
@@ -576,7 +576,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
"user pid=%d uid=%u auid=%u",
pid, uid, loginuid);
if (sid) {
- if (selinux_ctxid_to_string(
+ if (selinux_sid_to_string(
sid, &ctx, &len)) {
audit_log_format(ab,
" ssid=%u", sid);
@@ -614,7 +614,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
loginuid, sid);
break;
case AUDIT_SIGNAL_INFO:
- err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len);
+ err = selinux_sid_to_string(audit_sig_sid, &ctx, &len);
if (err)
return err;
sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a44879b0c72f..1a58a81fb09d 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1398,7 +1398,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action,
if (sid) {
char *ctx = NULL;
u32 len;
- if (selinux_ctxid_to_string(sid, &ctx, &len))
+ if (selinux_sid_to_string(sid, &ctx, &len))
audit_log_format(ab, " ssid=%u", sid);
else
audit_log_format(ab, " subj=%s", ctx);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1bd8827a0102..fb83c5cb8c32 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -385,7 +385,7 @@ static int audit_filter_rules(struct task_struct *tsk,
logged upon error */
if (f->se_rule) {
if (need_sid) {
- selinux_task_ctxid(tsk, &sid);
+ selinux_get_task_sid(tsk, &sid);
need_sid = 0;
}
result = selinux_audit_rule_match(sid, f->type,
@@ -898,7 +898,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
if (axi->osid != 0) {
char *ctx = NULL;
u32 len;
- if (selinux_ctxid_to_string(
+ if (selinux_sid_to_string(
axi->osid, &ctx, &len)) {
audit_log_format(ab, " osid=%u",
axi->osid);
@@ -1005,7 +1005,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
if (n->osid != 0) {
char *ctx = NULL;
u32 len;
- if (selinux_ctxid_to_string(
+ if (selinux_sid_to_string(
n->osid, &ctx, &len)) {
audit_log_format(ab, " osid=%u", n->osid);
call_panic = 2;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f230f9ae01c2..32c96628463e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock);
static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
+/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+ * Should always be manipulated under cpu_add_remove_lock
+ */
+static int cpu_hotplug_disabled;
+
#ifdef CONFIG_HOTPLUG_CPU
/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
@@ -108,30 +113,25 @@ static int take_cpu_down(void *unused)
return 0;
}
-int cpu_down(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int _cpu_down(unsigned int cpu)
{
int err;
struct task_struct *p;
cpumask_t old_allowed, tmp;
- mutex_lock(&cpu_add_remove_lock);
- if (num_online_cpus() == 1) {
- err = -EBUSY;
- goto out;
- }
+ if (num_online_cpus() == 1)
+ return -EBUSY;
- if (!cpu_online(cpu)) {
- err = -EINVAL;
- goto out;
- }
+ if (!cpu_online(cpu))
+ return -EINVAL;
err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
(void *)(long)cpu);
if (err == NOTIFY_BAD) {
printk("%s: attempt to take down CPU %u failed\n",
__FUNCTION__, cpu);
- err = -EINVAL;
- goto out;
+ return -EINVAL;
}
/* Ensure that we are not runnable on dying cpu */
@@ -179,22 +179,32 @@ out_thread:
err = kthread_stop(p);
out_allowed:
set_cpus_allowed(current, old_allowed);
-out:
+ return err;
+}
+
+int cpu_down(unsigned int cpu)
+{
+ int err = 0;
+
+ mutex_lock(&cpu_add_remove_lock);
+ if (cpu_hotplug_disabled)
+ err = -EBUSY;
+ else
+ err = _cpu_down(cpu);
+
mutex_unlock(&cpu_add_remove_lock);
return err;
}
#endif /*CONFIG_HOTPLUG_CPU*/
-int __devinit cpu_up(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int __devinit _cpu_up(unsigned int cpu)
{
int ret;
void *hcpu = (void *)(long)cpu;
- mutex_lock(&cpu_add_remove_lock);
- if (cpu_online(cpu) || !cpu_present(cpu)) {
- ret = -EINVAL;
- goto out;
- }
+ if (cpu_online(cpu) || !cpu_present(cpu))
+ return -EINVAL;
ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
if (ret == NOTIFY_BAD) {
@@ -219,7 +229,95 @@ out_notify:
if (ret != 0)
blocking_notifier_call_chain(&cpu_chain,
CPU_UP_CANCELED, hcpu);
+
+ return ret;
+}
+
+int __devinit cpu_up(unsigned int cpu)
+{
+ int err = 0;
+
+ mutex_lock(&cpu_add_remove_lock);
+ if (cpu_hotplug_disabled)
+ err = -EBUSY;
+ else
+ err = _cpu_up(cpu);
+
+ mutex_unlock(&cpu_add_remove_lock);
+ return err;
+}
+
+#ifdef CONFIG_SUSPEND_SMP
+static cpumask_t frozen_cpus;
+
+int disable_nonboot_cpus(void)
+{
+ int cpu, first_cpu, error;
+
+ mutex_lock(&cpu_add_remove_lock);
+ first_cpu = first_cpu(cpu_present_map);
+ if (!cpu_online(first_cpu)) {
+ error = _cpu_up(first_cpu);
+ if (error) {
+ printk(KERN_ERR "Could not bring CPU%d up.\n",
+ first_cpu);
+ goto out;
+ }
+ }
+ error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu));
+ if (error) {
+ printk(KERN_ERR "Could not run on CPU%d\n", first_cpu);
+ goto out;
+ }
+ /* We take down all of the non-boot CPUs in one shot to avoid races
+ * with the userspace trying to use the CPU hotplug at the same time
+ */
+ cpus_clear(frozen_cpus);
+ printk("Disabling non-boot CPUs ...\n");
+ for_each_online_cpu(cpu) {
+ if (cpu == first_cpu)
+ continue;
+ error = _cpu_down(cpu);
+ if (!error) {
+ cpu_set(cpu, frozen_cpus);
+ printk("CPU%d is down\n", cpu);
+ } else {
+ printk(KERN_ERR "Error taking CPU%d down: %d\n",
+ cpu, error);
+ break;
+ }
+ }
+ if (!error) {
+ BUG_ON(num_online_cpus() > 1);
+ /* Make sure the CPUs won't be enabled by someone else */
+ cpu_hotplug_disabled = 1;
+ } else {
+ printk(KERN_ERR "Non-boot CPUs are not disabled");
+ }
out:
mutex_unlock(&cpu_add_remove_lock);
- return ret;
+ return error;
+}
+
+void enable_nonboot_cpus(void)
+{
+ int cpu, error;
+
+ /* Allow everyone to use the CPU hotplug again */
+ mutex_lock(&cpu_add_remove_lock);
+ cpu_hotplug_disabled = 0;
+ mutex_unlock(&cpu_add_remove_lock);
+
+ printk("Enabling non-boot CPUs ...\n");
+ for_each_cpu_mask(cpu, frozen_cpus) {
+ error = cpu_up(cpu);
+ if (!error) {
+ printk("CPU%d is up\n", cpu);
+ continue;
+ }
+ printk(KERN_WARNING "Error taking CPU%d up: %d\n",
+ cpu, error);
+ }
+ cpus_clear(frozen_cpus);
}
+#endif
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4ea6f0dc2fc5..cff41511269f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2245,7 +2245,7 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
int i;
for (i = 0; zl->zones[i]; i++) {
- int nid = zl->zones[i]->zone_pgdat->node_id;
+ int nid = zone_to_nid(zl->zones[i]);
if (node_isset(nid, current->mems_allowed))
return 1;
@@ -2316,9 +2316,9 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
const struct cpuset *cs; /* current cpuset ancestors */
int allowed; /* is allocation in zone z allowed? */
- if (in_interrupt())
+ if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
return 1;
- node = z->zone_pgdat->node_id;
+ node = zone_to_nid(z);
might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
if (node_isset(node, current->mems_allowed))
return 1;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 48a53f68af96..4c6cdbaed661 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -154,6 +154,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
return retval;
}
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
/**
* __do_IRQ - original all in one highlevel IRQ handler
* @irq: the interrupt number
@@ -253,6 +254,7 @@ out:
return 1;
}
+#endif
#ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/kernel/module.c b/kernel/module.c
index 2a19cd47c046..b7fe6e840963 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1054,6 +1054,12 @@ static int mod_sysfs_setup(struct module *mod,
{
int err;
+ if (!module_subsys.kset.subsys) {
+ printk(KERN_ERR "%s: module_subsys not initialized\n",
+ mod->name);
+ err = -EINVAL;
+ goto out;
+ }
memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj));
err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name);
if (err)
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 619ecabf7c58..4b6e2f18e056 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -36,6 +36,17 @@ config PM_DEBUG
code. This is helpful when debugging and reporting various PM bugs,
like suspend support.
+config DISABLE_CONSOLE_SUSPEND
+ bool "Keep console(s) enabled during suspend/resume (DANGEROUS)"
+ depends on PM && PM_DEBUG
+ default n
+ ---help---
+ This option turns off the console suspend mechanism that prevents
+ debug messages from reaching the console during the suspend/resume
+ operations. This may be helpful when debugging device drivers'
+ suspend/resume routines, but may itself lead to problems, for example
+ if netconsole is used.
+
config PM_TRACE
bool "Suspend/resume event tracing"
depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 8d0af3d37a4b..38725f526afc 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -7,6 +7,4 @@ obj-y := main.o process.o console.o
obj-$(CONFIG_PM_LEGACY) += pm.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o swap.o user.o
-obj-$(CONFIG_SUSPEND_SMP) += smp.o
-
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index e13e74067845..7c7b9b65e365 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -18,6 +18,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/pm.h>
+#include <linux/cpu.h>
#include "power.h"
@@ -72,7 +73,10 @@ static int prepare_processes(void)
int error;
pm_prepare_console();
- disable_nonboot_cpus();
+
+ error = disable_nonboot_cpus();
+ if (error)
+ goto enable_cpus;
if (freeze_processes()) {
error = -EBUSY;
@@ -84,6 +88,7 @@ static int prepare_processes(void)
return 0;
thaw:
thaw_processes();
+enable_cpus:
enable_nonboot_cpus();
pm_restore_console();
return error;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6d295c776794..873228c71dab 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -16,6 +16,8 @@
#include <linux/init.h>
#include <linux/pm.h>
#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/resume-trace.h>
#include "power.h"
@@ -51,7 +53,7 @@ void pm_set_ops(struct pm_ops * ops)
static int suspend_prepare(suspend_state_t state)
{
- int error = 0;
+ int error;
unsigned int free_pages;
if (!pm_ops || !pm_ops->enter)
@@ -59,12 +61,9 @@ static int suspend_prepare(suspend_state_t state)
pm_prepare_console();
- disable_nonboot_cpus();
-
- if (num_online_cpus() != 1) {
- error = -EPERM;
+ error = disable_nonboot_cpus();
+ if (error)
goto Enable_cpu;
- }
if (freeze_processes()) {
error = -EAGAIN;
@@ -283,10 +282,39 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n
power_attr(state);
+#ifdef CONFIG_PM_TRACE
+int pm_trace_enabled;
+
+static ssize_t pm_trace_show(struct subsystem * subsys, char * buf)
+{
+ return sprintf(buf, "%d\n", pm_trace_enabled);
+}
+
+static ssize_t
+pm_trace_store(struct subsystem * subsys, const char * buf, size_t n)
+{
+ int val;
+
+ if (sscanf(buf, "%d", &val) == 1) {
+ pm_trace_enabled = !!val;
+ return n;
+ }
+ return -EINVAL;
+}
+
+power_attr(pm_trace);
+
+static struct attribute * g[] = {
+ &state_attr.attr,
+ &pm_trace_attr.attr,
+ NULL,
+};
+#else
static struct attribute * g[] = {
&state_attr.attr,
NULL,
};
+#endif /* CONFIG_PM_TRACE */
static struct attribute_group attr_group = {
.attrs = g,
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 57a792982fb9..bfe999f7b272 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -38,8 +38,6 @@ extern struct subsystem power_subsys;
/* References to section boundaries */
extern const void __nosave_begin, __nosave_end;
-extern struct pbe *pagedir_nosave;
-
/* Preferred image size in bytes (default 500 MB) */
extern unsigned long image_size;
extern int in_suspend;
@@ -50,21 +48,62 @@ extern asmlinkage int swsusp_arch_resume(void);
extern unsigned int count_data_pages(void);
+/**
+ * Auxiliary structure used for reading the snapshot image data and
+ * metadata from and writing them to the list of page backup entries
+ * (PBEs) which is the main data structure of swsusp.
+ *
+ * Using struct snapshot_handle we can transfer the image, including its
+ * metadata, as a continuous sequence of bytes with the help of
+ * snapshot_read_next() and snapshot_write_next().
+ *
+ * The code that writes the image to a storage or transfers it to
+ * the user land is required to use snapshot_read_next() for this
+ * purpose and it should not make any assumptions regarding the internal
+ * structure of the image. Similarly, the code that reads the image from
+ * a storage or transfers it from the user land is required to use
+ * snapshot_write_next().
+ *
+ * This may allow us to change the internal structure of the image
+ * in the future with considerably less effort.
+ */
+
struct snapshot_handle {
- loff_t offset;
- unsigned int page;
- unsigned int page_offset;
- unsigned int prev;
- struct pbe *pbe, *last_pbe;
- void *buffer;
- unsigned int buf_offset;
+ loff_t offset; /* number of the last byte ready for reading
+ * or writing in the sequence
+ */
+ unsigned int cur; /* number of the block of PAGE_SIZE bytes the
+ * next operation will refer to (ie. current)
+ */
+ unsigned int cur_offset; /* offset with respect to the current
+ * block (for the next operation)
+ */
+ unsigned int prev; /* number of the block of PAGE_SIZE bytes that
+ * was the current one previously
+ */
+ void *buffer; /* address of the block to read from
+ * or write to
+ */
+ unsigned int buf_offset; /* location to read from or write to,
+ * given as a displacement from 'buffer'
+ */
+ int sync_read; /* Set to one to notify the caller of
+ * snapshot_write_next() that it may
+ * need to call wait_on_bio_chain()
+ */
};
+/* This macro returns the address from/to which the caller of
+ * snapshot_read_next()/snapshot_write_next() is allowed to
+ * read/write data after the function returns
+ */
#define data_of(handle) ((handle).buffer + (handle).buf_offset)
+extern unsigned int snapshot_additional_pages(struct zone *zone);
extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
-int snapshot_image_loaded(struct snapshot_handle *handle);
+extern int snapshot_image_loaded(struct snapshot_handle *handle);
+extern void snapshot_free_unused_memory(struct snapshot_handle *handle);
#define SNAPSHOT_IOC_MAGIC '3'
#define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1)
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
deleted file mode 100644
index 5957312b2d68..000000000000
--- a/kernel/power/smp.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * drivers/power/smp.c - Functions for stopping other CPUs.
- *
- * Copyright 2004 Pavel Machek <pavel@suse.cz>
- * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz>
- *
- * This file is released under the GPLv2.
- */
-
-#undef DEBUG
-
-#include <linux/smp_lock.h>
-#include <linux/interrupt.h>
-#include <linux/suspend.h>
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <asm/atomic.h>
-#include <asm/tlbflush.h>
-
-/* This is protected by pm_sem semaphore */
-static cpumask_t frozen_cpus;
-
-void disable_nonboot_cpus(void)
-{
- int cpu, error;
-
- error = 0;
- cpus_clear(frozen_cpus);
- printk("Freezing cpus ...\n");
- for_each_online_cpu(cpu) {
- if (cpu == 0)
- continue;
- error = cpu_down(cpu);
- if (!error) {
- cpu_set(cpu, frozen_cpus);
- printk("CPU%d is down\n", cpu);
- continue;
- }
- printk("Error taking cpu %d down: %d\n", cpu, error);
- }
- BUG_ON(raw_smp_processor_id() != 0);
- if (error)
- panic("cpus not sleeping");
-}
-
-void enable_nonboot_cpus(void)
-{
- int cpu, error;
-
- printk("Thawing cpus ...\n");
- for_each_cpu_mask(cpu, frozen_cpus) {
- error = cpu_up(cpu);
- if (!error) {
- printk("CPU%d is up\n", cpu);
- continue;
- }
- printk("Error taking cpu %d up: %d\n", cpu, error);
- panic("Not enough cpus");
- }
- cpus_clear(frozen_cpus);
-}
-
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 75d4886e648e..1b84313cbab5 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -34,10 +34,12 @@
#include "power.h"
-struct pbe *pagedir_nosave;
+/* List of PBEs used for creating and restoring the suspend image */
+struct pbe *restore_pblist;
+
static unsigned int nr_copy_pages;
static unsigned int nr_meta_pages;
-static unsigned long *buffer;
+static void *buffer;
#ifdef CONFIG_HIGHMEM
unsigned int count_highmem_pages(void)
@@ -156,240 +158,637 @@ static inline int save_highmem(void) {return 0;}
static inline int restore_highmem(void) {return 0;}
#endif
-static int pfn_is_nosave(unsigned long pfn)
+/**
+ * @safe_needed - on resume, for storing the PBE list and the image,
+ * we can only use memory pages that do not conflict with the pages
+ * used before suspend.
+ *
+ * The unsafe pages are marked with the PG_nosave_free flag
+ * and we count them using unsafe_pages
+ */
+
+#define PG_ANY 0
+#define PG_SAFE 1
+#define PG_UNSAFE_CLEAR 1
+#define PG_UNSAFE_KEEP 0
+
+static unsigned int allocated_unsafe_pages;
+
+static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
{
- unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
- unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
- return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+ void *res;
+
+ res = (void *)get_zeroed_page(gfp_mask);
+ if (safe_needed)
+ while (res && PageNosaveFree(virt_to_page(res))) {
+ /* The page is unsafe, mark it for swsusp_free() */
+ SetPageNosave(virt_to_page(res));
+ allocated_unsafe_pages++;
+ res = (void *)get_zeroed_page(gfp_mask);
+ }
+ if (res) {
+ SetPageNosave(virt_to_page(res));
+ SetPageNosaveFree(virt_to_page(res));
+ }
+ return res;
+}
+
+unsigned long get_safe_page(gfp_t gfp_mask)
+{
+ return (unsigned long)alloc_image_page(gfp_mask, PG_SAFE);
}
/**
- * saveable - Determine whether a page should be cloned or not.
- * @pfn: The page
- *
- * We save a page if it's Reserved, and not in the range of pages
- * statically defined as 'unsaveable', or if it isn't reserved, and
- * isn't part of a free chunk of pages.
+ * free_image_page - free page represented by @addr, allocated with
+ * alloc_image_page (page flags set by it must be cleared)
*/
-static int saveable(struct zone *zone, unsigned long *zone_pfn)
+static inline void free_image_page(void *addr, int clear_nosave_free)
{
- unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
- struct page *page;
+ ClearPageNosave(virt_to_page(addr));
+ if (clear_nosave_free)
+ ClearPageNosaveFree(virt_to_page(addr));
+ free_page((unsigned long)addr);
+}
- if (!pfn_valid(pfn))
- return 0;
+/* struct linked_page is used to build chains of pages */
- page = pfn_to_page(pfn);
- BUG_ON(PageReserved(page) && PageNosave(page));
- if (PageNosave(page))
- return 0;
- if (PageReserved(page) && pfn_is_nosave(pfn))
- return 0;
- if (PageNosaveFree(page))
- return 0;
+#define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *))
- return 1;
-}
+struct linked_page {
+ struct linked_page *next;
+ char data[LINKED_PAGE_DATA_SIZE];
+} __attribute__((packed));
-unsigned int count_data_pages(void)
+static inline void
+free_list_of_pages(struct linked_page *list, int clear_page_nosave)
{
- struct zone *zone;
- unsigned long zone_pfn;
- unsigned int n = 0;
+ while (list) {
+ struct linked_page *lp = list->next;
- for_each_zone (zone) {
- if (is_highmem(zone))
- continue;
- mark_free_pages(zone);
- for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
- n += saveable(zone, &zone_pfn);
+ free_image_page(list, clear_page_nosave);
+ list = lp;
}
- return n;
}
-static void copy_data_pages(struct pbe *pblist)
+/**
+ * struct chain_allocator is used for allocating small objects out of
+ * a linked list of pages called 'the chain'.
+ *
+ * The chain grows each time when there is no room for a new object in
+ * the current page. The allocated objects cannot be freed individually.
+ * It is only possible to free them all at once, by freeing the entire
+ * chain.
+ *
+ * NOTE: The chain allocator may be inefficient if the allocated objects
+ * are not much smaller than PAGE_SIZE.
+ */
+
+struct chain_allocator {
+ struct linked_page *chain; /* the chain */
+ unsigned int used_space; /* total size of objects allocated out
+ * of the current page
+ */
+ gfp_t gfp_mask; /* mask for allocating pages */
+ int safe_needed; /* if set, only "safe" pages are allocated */
+};
+
+static void
+chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
{
- struct zone *zone;
- unsigned long zone_pfn;
- struct pbe *pbe, *p;
+ ca->chain = NULL;
+ ca->used_space = LINKED_PAGE_DATA_SIZE;
+ ca->gfp_mask = gfp_mask;
+ ca->safe_needed = safe_needed;
+}
- pbe = pblist;
- for_each_zone (zone) {
- if (is_highmem(zone))
- continue;
- mark_free_pages(zone);
- /* This is necessary for swsusp_free() */
- for_each_pb_page (p, pblist)
- SetPageNosaveFree(virt_to_page(p));
- for_each_pbe (p, pblist)
- SetPageNosaveFree(virt_to_page(p->address));
- for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
- if (saveable(zone, &zone_pfn)) {
- struct page *page;
- long *src, *dst;
- int n;
-
- page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
- BUG_ON(!pbe);
- pbe->orig_address = (unsigned long)page_address(page);
- /* copy_page and memcpy are not usable for copying task structs. */
- dst = (long *)pbe->address;
- src = (long *)pbe->orig_address;
- for (n = PAGE_SIZE / sizeof(long); n; n--)
- *dst++ = *src++;
- pbe = pbe->next;
- }
- }
+static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
+{
+ void *ret;
+
+ if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
+ struct linked_page *lp;
+
+ lp = alloc_image_page(ca->gfp_mask, ca->safe_needed);
+ if (!lp)
+ return NULL;
+
+ lp->next = ca->chain;
+ ca->chain = lp;
+ ca->used_space = 0;
}
- BUG_ON(pbe);
+ ret = ca->chain->data + ca->used_space;
+ ca->used_space += size;
+ return ret;
}
+static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
+{
+ free_list_of_pages(ca->chain, clear_page_nosave);
+ memset(ca, 0, sizeof(struct chain_allocator));
+}
/**
- * free_pagedir - free pages allocated with alloc_pagedir()
+ * Data types related to memory bitmaps.
+ *
+ * Memory bitmap is a structure consiting of many linked lists of
+ * objects. The main list's elements are of type struct zone_bitmap
+ * and each of them corresonds to one zone. For each zone bitmap
+ * object there is a list of objects of type struct bm_block that
+ * represent each blocks of bit chunks in which information is
+ * stored.
+ *
+ * struct memory_bitmap contains a pointer to the main list of zone
+ * bitmap objects, a struct bm_position used for browsing the bitmap,
+ * and a pointer to the list of pages used for allocating all of the
+ * zone bitmap objects and bitmap block objects.
+ *
+ * NOTE: It has to be possible to lay out the bitmap in memory
+ * using only allocations of order 0. Additionally, the bitmap is
+ * designed to work with arbitrary number of zones (this is over the
+ * top for now, but let's avoid making unnecessary assumptions ;-).
+ *
+ * struct zone_bitmap contains a pointer to a list of bitmap block
+ * objects and a pointer to the bitmap block object that has been
+ * most recently used for setting bits. Additionally, it contains the
+ * pfns that correspond to the start and end of the represented zone.
+ *
+ * struct bm_block contains a pointer to the memory page in which
+ * information is stored (in the form of a block of bit chunks
+ * of type unsigned long each). It also contains the pfns that
+ * correspond to the start and end of the represented memory area and
+ * the number of bit chunks in the block.
+ *
+ * NOTE: Memory bitmaps are used for two types of operations only:
+ * "set a bit" and "find the next bit set". Moreover, the searching
+ * is always carried out after all of the "set a bit" operations
+ * on given bitmap.
*/
-static void free_pagedir(struct pbe *pblist, int clear_nosave_free)
+#define BM_END_OF_MAP (~0UL)
+
+#define BM_CHUNKS_PER_BLOCK (PAGE_SIZE / sizeof(long))
+#define BM_BITS_PER_CHUNK (sizeof(long) << 3)
+#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3)
+
+struct bm_block {
+ struct bm_block *next; /* next element of the list */
+ unsigned long start_pfn; /* pfn represented by the first bit */
+ unsigned long end_pfn; /* pfn represented by the last bit plus 1 */
+ unsigned int size; /* number of bit chunks */
+ unsigned long *data; /* chunks of bits representing pages */
+};
+
+struct zone_bitmap {
+ struct zone_bitmap *next; /* next element of the list */
+ unsigned long start_pfn; /* minimal pfn in this zone */
+ unsigned long end_pfn; /* maximal pfn in this zone plus 1 */
+ struct bm_block *bm_blocks; /* list of bitmap blocks */
+ struct bm_block *cur_block; /* recently used bitmap block */
+};
+
+/* strcut bm_position is used for browsing memory bitmaps */
+
+struct bm_position {
+ struct zone_bitmap *zone_bm;
+ struct bm_block *block;
+ int chunk;
+ int bit;
+};
+
+struct memory_bitmap {
+ struct zone_bitmap *zone_bm_list; /* list of zone bitmaps */
+ struct linked_page *p_list; /* list of pages used to store zone
+ * bitmap objects and bitmap block
+ * objects
+ */
+ struct bm_position cur; /* most recently used bit position */
+};
+
+/* Functions that operate on memory bitmaps */
+
+static inline void memory_bm_reset_chunk(struct memory_bitmap *bm)
{
- struct pbe *pbe;
+ bm->cur.chunk = 0;
+ bm->cur.bit = -1;
+}
- while (pblist) {
- pbe = (pblist + PB_PAGE_SKIP)->next;
- ClearPageNosave(virt_to_page(pblist));
- if (clear_nosave_free)
- ClearPageNosaveFree(virt_to_page(pblist));
- free_page((unsigned long)pblist);
- pblist = pbe;
- }
+static void memory_bm_position_reset(struct memory_bitmap *bm)
+{
+ struct zone_bitmap *zone_bm;
+
+ zone_bm = bm->zone_bm_list;
+ bm->cur.zone_bm = zone_bm;
+ bm->cur.block = zone_bm->bm_blocks;
+ memory_bm_reset_chunk(bm);
}
+static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
+
/**
- * fill_pb_page - Create a list of PBEs on a given memory page
+ * create_bm_block_list - create a list of block bitmap objects
*/
-static inline void fill_pb_page(struct pbe *pbpage)
+static inline struct bm_block *
+create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca)
{
- struct pbe *p;
+ struct bm_block *bblist = NULL;
+
+ while (nr_blocks-- > 0) {
+ struct bm_block *bb;
- p = pbpage;
- pbpage += PB_PAGE_SKIP;
- do
- p->next = p + 1;
- while (++p < pbpage);
+ bb = chain_alloc(ca, sizeof(struct bm_block));
+ if (!bb)
+ return NULL;
+
+ bb->next = bblist;
+ bblist = bb;
+ }
+ return bblist;
}
/**
- * create_pbe_list - Create a list of PBEs on top of a given chain
- * of memory pages allocated with alloc_pagedir()
+ * create_zone_bm_list - create a list of zone bitmap objects
*/
-static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
+static inline struct zone_bitmap *
+create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca)
{
- struct pbe *pbpage, *p;
- unsigned int num = PBES_PER_PAGE;
+ struct zone_bitmap *zbmlist = NULL;
- for_each_pb_page (pbpage, pblist) {
- if (num >= nr_pages)
- break;
+ while (nr_zones-- > 0) {
+ struct zone_bitmap *zbm;
+
+ zbm = chain_alloc(ca, sizeof(struct zone_bitmap));
+ if (!zbm)
+ return NULL;
+
+ zbm->next = zbmlist;
+ zbmlist = zbm;
+ }
+ return zbmlist;
+}
+
+/**
+ * memory_bm_create - allocate memory for a memory bitmap
+ */
+
+static int
+memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
+{
+ struct chain_allocator ca;
+ struct zone *zone;
+ struct zone_bitmap *zone_bm;
+ struct bm_block *bb;
+ unsigned int nr;
+
+ chain_init(&ca, gfp_mask, safe_needed);
- fill_pb_page(pbpage);
- num += PBES_PER_PAGE;
+ /* Compute the number of zones */
+ nr = 0;
+ for_each_zone (zone)
+ if (populated_zone(zone) && !is_highmem(zone))
+ nr++;
+
+ /* Allocate the list of zones bitmap objects */
+ zone_bm = create_zone_bm_list(nr, &ca);
+ bm->zone_bm_list = zone_bm;
+ if (!zone_bm) {
+ chain_free(&ca, PG_UNSAFE_CLEAR);
+ return -ENOMEM;
}
- if (pbpage) {
- for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
- p->next = p + 1;
- p->next = NULL;
+
+ /* Initialize the zone bitmap objects */
+ for_each_zone (zone) {
+ unsigned long pfn;
+
+ if (!populated_zone(zone) || is_highmem(zone))
+ continue;
+
+ zone_bm->start_pfn = zone->zone_start_pfn;
+ zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ /* Allocate the list of bitmap block objects */
+ nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
+ bb = create_bm_block_list(nr, &ca);
+ zone_bm->bm_blocks = bb;
+ zone_bm->cur_block = bb;
+ if (!bb)
+ goto Free;
+
+ nr = zone->spanned_pages;
+ pfn = zone->zone_start_pfn;
+ /* Initialize the bitmap block objects */
+ while (bb) {
+ unsigned long *ptr;
+
+ ptr = alloc_image_page(gfp_mask, safe_needed);
+ bb->data = ptr;
+ if (!ptr)
+ goto Free;
+
+ bb->start_pfn = pfn;
+ if (nr >= BM_BITS_PER_BLOCK) {
+ pfn += BM_BITS_PER_BLOCK;
+ bb->size = BM_CHUNKS_PER_BLOCK;
+ nr -= BM_BITS_PER_BLOCK;
+ } else {
+ /* This is executed only once in the loop */
+ pfn += nr;
+ bb->size = DIV_ROUND_UP(nr, BM_BITS_PER_CHUNK);
+ }
+ bb->end_pfn = pfn;
+ bb = bb->next;
+ }
+ zone_bm = zone_bm->next;
}
+ bm->p_list = ca.chain;
+ memory_bm_position_reset(bm);
+ return 0;
+
+Free:
+ bm->p_list = ca.chain;
+ memory_bm_free(bm, PG_UNSAFE_CLEAR);
+ return -ENOMEM;
}
-static unsigned int unsafe_pages;
+/**
+ * memory_bm_free - free memory occupied by the memory bitmap @bm
+ */
+
+static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
+{
+ struct zone_bitmap *zone_bm;
+
+ /* Free the list of bit blocks for each zone_bitmap object */
+ zone_bm = bm->zone_bm_list;
+ while (zone_bm) {
+ struct bm_block *bb;
+
+ bb = zone_bm->bm_blocks;
+ while (bb) {
+ if (bb->data)
+ free_image_page(bb->data, clear_nosave_free);
+ bb = bb->next;
+ }
+ zone_bm = zone_bm->next;
+ }
+ free_list_of_pages(bm->p_list, clear_nosave_free);
+ bm->zone_bm_list = NULL;
+}
/**
- * @safe_needed - on resume, for storing the PBE list and the image,
- * we can only use memory pages that do not conflict with the pages
- * used before suspend.
+ * memory_bm_set_bit - set the bit in the bitmap @bm that corresponds
+ * to given pfn. The cur_zone_bm member of @bm and the cur_block member
+ * of @bm->cur_zone_bm are updated.
*
- * The unsafe pages are marked with the PG_nosave_free flag
- * and we count them using unsafe_pages
+ * If the bit cannot be set, the function returns -EINVAL .
*/
-static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
+static int
+memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
{
- void *res;
-
- res = (void *)get_zeroed_page(gfp_mask);
- if (safe_needed)
- while (res && PageNosaveFree(virt_to_page(res))) {
- /* The page is unsafe, mark it for swsusp_free() */
- SetPageNosave(virt_to_page(res));
- unsafe_pages++;
- res = (void *)get_zeroed_page(gfp_mask);
+ struct zone_bitmap *zone_bm;
+ struct bm_block *bb;
+
+ /* Check if the pfn is from the current zone */
+ zone_bm = bm->cur.zone_bm;
+ if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
+ zone_bm = bm->zone_bm_list;
+ /* We don't assume that the zones are sorted by pfns */
+ while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
+ zone_bm = zone_bm->next;
+ if (unlikely(!zone_bm))
+ return -EINVAL;
}
- if (res) {
- SetPageNosave(virt_to_page(res));
- SetPageNosaveFree(virt_to_page(res));
+ bm->cur.zone_bm = zone_bm;
}
- return res;
+ /* Check if the pfn corresponds to the current bitmap block */
+ bb = zone_bm->cur_block;
+ if (pfn < bb->start_pfn)
+ bb = zone_bm->bm_blocks;
+
+ while (pfn >= bb->end_pfn) {
+ bb = bb->next;
+ if (unlikely(!bb))
+ return -EINVAL;
+ }
+ zone_bm->cur_block = bb;
+ pfn -= bb->start_pfn;
+ set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK);
+ return 0;
}
-unsigned long get_safe_page(gfp_t gfp_mask)
+/* Two auxiliary functions for memory_bm_next_pfn */
+
+/* Find the first set bit in the given chunk, if there is one */
+
+static inline int next_bit_in_chunk(int bit, unsigned long *chunk_p)
{
- return (unsigned long)alloc_image_page(gfp_mask, 1);
+ bit++;
+ while (bit < BM_BITS_PER_CHUNK) {
+ if (test_bit(bit, chunk_p))
+ return bit;
+
+ bit++;
+ }
+ return -1;
+}
+
+/* Find a chunk containing some bits set in given block of bits */
+
+static inline int next_chunk_in_block(int n, struct bm_block *bb)
+{
+ n++;
+ while (n < bb->size) {
+ if (bb->data[n])
+ return n;
+
+ n++;
+ }
+ return -1;
}
/**
- * alloc_pagedir - Allocate the page directory.
- *
- * First, determine exactly how many pages we need and
- * allocate them.
+ * memory_bm_next_pfn - find the pfn that corresponds to the next set bit
+ * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is
+ * returned.
*
- * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
- * struct pbe elements (pbes) and the last element in the page points
- * to the next page.
+ * It is required to run memory_bm_position_reset() before the first call to
+ * this function.
+ */
+
+static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
+{
+ struct zone_bitmap *zone_bm;
+ struct bm_block *bb;
+ int chunk;
+ int bit;
+
+ do {
+ bb = bm->cur.block;
+ do {
+ chunk = bm->cur.chunk;
+ bit = bm->cur.bit;
+ do {
+ bit = next_bit_in_chunk(bit, bb->data + chunk);
+ if (bit >= 0)
+ goto Return_pfn;
+
+ chunk = next_chunk_in_block(chunk, bb);
+ bit = -1;
+ } while (chunk >= 0);
+ bb = bb->next;
+ bm->cur.block = bb;
+ memory_bm_reset_chunk(bm);
+ } while (bb);
+ zone_bm = bm->cur.zone_bm->next;
+ if (zone_bm) {
+ bm->cur.zone_bm = zone_bm;
+ bm->cur.block = zone_bm->bm_blocks;
+ memory_bm_reset_chunk(bm);
+ }
+ } while (zone_bm);
+ memory_bm_position_reset(bm);
+ return BM_END_OF_MAP;
+
+Return_pfn:
+ bm->cur.chunk = chunk;
+ bm->cur.bit = bit;
+ return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit;
+}
+
+/**
+ * snapshot_additional_pages - estimate the number of additional pages
+ * be needed for setting up the suspend image data structures for given
+ * zone (usually the returned value is greater than the exact number)
+ */
+
+unsigned int snapshot_additional_pages(struct zone *zone)
+{
+ unsigned int res;
+
+ res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
+ res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
+ return res;
+}
+
+/**
+ * pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+static inline int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
+ unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
+ return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+/**
+ * saveable - Determine whether a page should be cloned or not.
+ * @pfn: The page
*
- * On each page we set up a list of struct_pbe elements.
+ * We save a page if it isn't Nosave, and is not in the range of pages
+ * statically defined as 'unsaveable', and it
+ * isn't a part of a free chunk of pages.
*/
-static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask,
- int safe_needed)
+static struct page *saveable_page(unsigned long pfn)
{
- unsigned int num;
- struct pbe *pblist, *pbe;
+ struct page *page;
+
+ if (!pfn_valid(pfn))
+ return NULL;
- if (!nr_pages)
+ page = pfn_to_page(pfn);
+
+ if (PageNosave(page))
+ return NULL;
+ if (PageReserved(page) && pfn_is_nosave(pfn))
return NULL;
+ if (PageNosaveFree(page))
+ return NULL;
+
+ return page;
+}
+
+unsigned int count_data_pages(void)
+{
+ struct zone *zone;
+ unsigned long pfn, max_zone_pfn;
+ unsigned int n = 0;
- pblist = alloc_image_page(gfp_mask, safe_needed);
- /* FIXME: rewrite this ugly loop */
- for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
- pbe = pbe->next, num += PBES_PER_PAGE) {
- pbe += PB_PAGE_SKIP;
- pbe->next = alloc_image_page(gfp_mask, safe_needed);
+ for_each_zone (zone) {
+ if (is_highmem(zone))
+ continue;
+ mark_free_pages(zone);
+ max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+ n += !!saveable_page(pfn);
}
- if (!pbe) { /* get_zeroed_page() failed */
- free_pagedir(pblist, 1);
- pblist = NULL;
- } else
- create_pbe_list(pblist, nr_pages);
- return pblist;
+ return n;
+}
+
+static inline void copy_data_page(long *dst, long *src)
+{
+ int n;
+
+ /* copy_page and memcpy are not usable for copying task structs. */
+ for (n = PAGE_SIZE / sizeof(long); n; n--)
+ *dst++ = *src++;
+}
+
+static void
+copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
+{
+ struct zone *zone;
+ unsigned long pfn;
+
+ for_each_zone (zone) {
+ unsigned long max_zone_pfn;
+
+ if (is_highmem(zone))
+ continue;
+
+ mark_free_pages(zone);
+ max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+ if (saveable_page(pfn))
+ memory_bm_set_bit(orig_bm, pfn);
+ }
+ memory_bm_position_reset(orig_bm);
+ memory_bm_position_reset(copy_bm);
+ do {
+ pfn = memory_bm_next_pfn(orig_bm);
+ if (likely(pfn != BM_END_OF_MAP)) {
+ struct page *page;
+ void *src;
+
+ page = pfn_to_page(pfn);
+ src = page_address(page);
+ page = pfn_to_page(memory_bm_next_pfn(copy_bm));
+ copy_data_page(page_address(page), src);
+ }
+ } while (pfn != BM_END_OF_MAP);
}
/**
- * Free pages we allocated for suspend. Suspend pages are alocated
- * before atomic copy, so we need to free them after resume.
+ * swsusp_free - free pages allocated for the suspend.
+ *
+ * Suspend pages are alocated before the atomic copy is made, so we
+ * need to release them after the resume.
*/
void swsusp_free(void)
{
struct zone *zone;
- unsigned long zone_pfn;
+ unsigned long pfn, max_zone_pfn;
for_each_zone(zone) {
- for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
- if (pfn_valid(zone_pfn + zone->zone_start_pfn)) {
- struct page *page;
- page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
+ max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+ if (pfn_valid(pfn)) {
+ struct page *page = pfn_to_page(pfn);
+
if (PageNosave(page) && PageNosaveFree(page)) {
ClearPageNosave(page);
ClearPageNosaveFree(page);
@@ -399,7 +798,7 @@ void swsusp_free(void)
}
nr_copy_pages = 0;
nr_meta_pages = 0;
- pagedir_nosave = NULL;
+ restore_pblist = NULL;
buffer = NULL;
}
@@ -414,46 +813,57 @@ void swsusp_free(void)
static int enough_free_mem(unsigned int nr_pages)
{
struct zone *zone;
- unsigned int n = 0;
+ unsigned int free = 0, meta = 0;
for_each_zone (zone)
- if (!is_highmem(zone))
- n += zone->free_pages;
- pr_debug("swsusp: available memory: %u pages\n", n);
- return n > (nr_pages + PAGES_FOR_IO +
- (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
-}
+ if (!is_highmem(zone)) {
+ free += zone->free_pages;
+ meta += snapshot_additional_pages(zone);
+ }
-static int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed)
-{
- struct pbe *p;
+ pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n",
+ nr_pages, PAGES_FOR_IO, meta, free);
- for_each_pbe (p, pblist) {
- p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed);
- if (!p->address)
- return -ENOMEM;
- }
- return 0;
+ return free > nr_pages + PAGES_FOR_IO + meta;
}
-static struct pbe *swsusp_alloc(unsigned int nr_pages)
+static int
+swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
+ unsigned int nr_pages)
{
- struct pbe *pblist;
+ int error;
- if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) {
- printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
- return NULL;
- }
+ error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
+ if (error)
+ goto Free;
- if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) {
- printk(KERN_ERR "suspend: Allocating image pages failed.\n");
- swsusp_free();
- return NULL;
+ error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
+ if (error)
+ goto Free;
+
+ while (nr_pages-- > 0) {
+ struct page *page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+ if (!page)
+ goto Free;
+
+ SetPageNosave(page);
+ SetPageNosaveFree(page);
+ memory_bm_set_bit(copy_bm, page_to_pfn(page));
}
+ return 0;
- return pblist;
+Free:
+ swsusp_free();
+ return -ENOMEM;
}
+/* Memory bitmap used for marking saveable pages */
+static struct memory_bitmap orig_bm;
+/* Memory bitmap used for marking allocated pages that will contain the copies
+ * of saveable pages
+ */
+static struct memory_bitmap copy_bm;
+
asmlinkage int swsusp_save(void)
{
unsigned int nr_pages;
@@ -464,25 +874,19 @@ asmlinkage int swsusp_save(void)
nr_pages = count_data_pages();
printk("swsusp: Need to copy %u pages\n", nr_pages);
- pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
- nr_pages,
- (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE,
- PAGES_FOR_IO, nr_free_pages());
-
if (!enough_free_mem(nr_pages)) {
printk(KERN_ERR "swsusp: Not enough free memory\n");
return -ENOMEM;
}
- pagedir_nosave = swsusp_alloc(nr_pages);
- if (!pagedir_nosave)
+ if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages))
return -ENOMEM;
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
drain_local_pages();
- copy_data_pages(pagedir_nosave);
+ copy_data_pages(&copy_bm, &orig_bm);
/*
* End of critical section. From now on, we can write to memory,
@@ -511,22 +915,20 @@ static void init_header(struct swsusp_info *info)
}
/**
- * pack_orig_addresses - the .orig_address fields of the PBEs from the
- * list starting at @pbe are stored in the array @buf[] (1 page)
+ * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
+ * are stored in the array @buf[] (1 page at a time)
*/
-static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pbe)
+static inline void
+pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
{
int j;
- for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
- buf[j] = pbe->orig_address;
- pbe = pbe->next;
+ for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
+ buf[j] = memory_bm_next_pfn(bm);
+ if (unlikely(buf[j] == BM_END_OF_MAP))
+ break;
}
- if (!pbe)
- for (; j < PAGE_SIZE / sizeof(long); j++)
- buf[j] = 0;
- return pbe;
}
/**
@@ -553,37 +955,39 @@ static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pb
int snapshot_read_next(struct snapshot_handle *handle, size_t count)
{
- if (handle->page > nr_meta_pages + nr_copy_pages)
+ if (handle->cur > nr_meta_pages + nr_copy_pages)
return 0;
+
if (!buffer) {
/* This makes the buffer be freed by swsusp_free() */
- buffer = alloc_image_page(GFP_ATOMIC, 0);
+ buffer = alloc_image_page(GFP_ATOMIC, PG_ANY);
if (!buffer)
return -ENOMEM;
}
if (!handle->offset) {
init_header((struct swsusp_info *)buffer);
handle->buffer = buffer;
- handle->pbe = pagedir_nosave;
+ memory_bm_position_reset(&orig_bm);
+ memory_bm_position_reset(&copy_bm);
}
- if (handle->prev < handle->page) {
- if (handle->page <= nr_meta_pages) {
- handle->pbe = pack_orig_addresses(buffer, handle->pbe);
- if (!handle->pbe)
- handle->pbe = pagedir_nosave;
+ if (handle->prev < handle->cur) {
+ if (handle->cur <= nr_meta_pages) {
+ memset(buffer, 0, PAGE_SIZE);
+ pack_pfns(buffer, &orig_bm);
} else {
- handle->buffer = (void *)handle->pbe->address;
- handle->pbe = handle->pbe->next;
+ unsigned long pfn = memory_bm_next_pfn(&copy_bm);
+
+ handle->buffer = page_address(pfn_to_page(pfn));
}
- handle->prev = handle->page;
+ handle->prev = handle->cur;
}
- handle->buf_offset = handle->page_offset;
- if (handle->page_offset + count >= PAGE_SIZE) {
- count = PAGE_SIZE - handle->page_offset;
- handle->page_offset = 0;
- handle->page++;
+ handle->buf_offset = handle->cur_offset;
+ if (handle->cur_offset + count >= PAGE_SIZE) {
+ count = PAGE_SIZE - handle->cur_offset;
+ handle->cur_offset = 0;
+ handle->cur++;
} else {
- handle->page_offset += count;
+ handle->cur_offset += count;
}
handle->offset += count;
return count;
@@ -595,47 +999,50 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
* had been used before suspend
*/
-static int mark_unsafe_pages(struct pbe *pblist)
+static int mark_unsafe_pages(struct memory_bitmap *bm)
{
struct zone *zone;
- unsigned long zone_pfn;
- struct pbe *p;
-
- if (!pblist) /* a sanity check */
- return -EINVAL;
+ unsigned long pfn, max_zone_pfn;
/* Clear page flags */
for_each_zone (zone) {
- for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
- if (pfn_valid(zone_pfn + zone->zone_start_pfn))
- ClearPageNosaveFree(pfn_to_page(zone_pfn +
- zone->zone_start_pfn));
+ max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+ if (pfn_valid(pfn))
+ ClearPageNosaveFree(pfn_to_page(pfn));
}
- /* Mark orig addresses */
- for_each_pbe (p, pblist) {
- if (virt_addr_valid(p->orig_address))
- SetPageNosaveFree(virt_to_page(p->orig_address));
- else
- return -EFAULT;
- }
+ /* Mark pages that correspond to the "original" pfns as "unsafe" */
+ memory_bm_position_reset(bm);
+ do {
+ pfn = memory_bm_next_pfn(bm);
+ if (likely(pfn != BM_END_OF_MAP)) {
+ if (likely(pfn_valid(pfn)))
+ SetPageNosaveFree(pfn_to_page(pfn));
+ else
+ return -EFAULT;
+ }
+ } while (pfn != BM_END_OF_MAP);
- unsafe_pages = 0;
+ allocated_unsafe_pages = 0;
return 0;
}
-static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
+static void
+duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
{
- /* We assume both lists contain the same number of elements */
- while (src) {
- dst->orig_address = src->orig_address;
- dst = dst->next;
- src = src->next;
+ unsigned long pfn;
+
+ memory_bm_position_reset(src);
+ pfn = memory_bm_next_pfn(src);
+ while (pfn != BM_END_OF_MAP) {
+ memory_bm_set_bit(dst, pfn);
+ pfn = memory_bm_next_pfn(src);
}
}
-static int check_header(struct swsusp_info *info)
+static inline int check_header(struct swsusp_info *info)
{
char *reason = NULL;
@@ -662,19 +1069,14 @@ static int check_header(struct swsusp_info *info)
* load header - check the image header and copy data from it
*/
-static int load_header(struct snapshot_handle *handle,
- struct swsusp_info *info)
+static int
+load_header(struct swsusp_info *info)
{
int error;
- struct pbe *pblist;
+ restore_pblist = NULL;
error = check_header(info);
if (!error) {
- pblist = alloc_pagedir(info->image_pages, GFP_ATOMIC, 0);
- if (!pblist)
- return -ENOMEM;
- pagedir_nosave = pblist;
- handle->pbe = pblist;
nr_copy_pages = info->image_pages;
nr_meta_pages = info->pages - info->image_pages - 1;
}
@@ -682,113 +1084,137 @@ static int load_header(struct snapshot_handle *handle,
}
/**
- * unpack_orig_addresses - copy the elements of @buf[] (1 page) to
- * the PBEs in the list starting at @pbe
+ * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
+ * the corresponding bit in the memory bitmap @bm
*/
-static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
- struct pbe *pbe)
+static inline void
+unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
{
int j;
- for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
- pbe->orig_address = buf[j];
- pbe = pbe->next;
+ for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
+ if (unlikely(buf[j] == BM_END_OF_MAP))
+ break;
+
+ memory_bm_set_bit(bm, buf[j]);
}
- return pbe;
}
/**
- * prepare_image - use metadata contained in the PBE list
- * pointed to by pagedir_nosave to mark the pages that will
- * be overwritten in the process of restoring the system
- * memory state from the image ("unsafe" pages) and allocate
- * memory for the image
+ * prepare_image - use the memory bitmap @bm to mark the pages that will
+ * be overwritten in the process of restoring the system memory state
+ * from the suspend image ("unsafe" pages) and allocate memory for the
+ * image.
*
- * The idea is to allocate the PBE list first and then
- * allocate as many pages as it's needed for the image data,
- * but not to assign these pages to the PBEs initially.
- * Instead, we just mark them as allocated and create a list
- * of "safe" which will be used later
+ * The idea is to allocate a new memory bitmap first and then allocate
+ * as many pages as needed for the image data, but not to assign these
+ * pages to specific tasks initially. Instead, we just mark them as
+ * allocated and create a list of "safe" pages that will be used later.
*/
-struct safe_page {
- struct safe_page *next;
- char padding[PAGE_SIZE - sizeof(void *)];
-};
+#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
-static struct safe_page *safe_pages;
+static struct linked_page *safe_pages_list;
-static int prepare_image(struct snapshot_handle *handle)
+static int
+prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
{
- int error = 0;
- unsigned int nr_pages = nr_copy_pages;
- struct pbe *p, *pblist = NULL;
+ unsigned int nr_pages;
+ struct linked_page *sp_list, *lp;
+ int error;
- p = pagedir_nosave;
- error = mark_unsafe_pages(p);
- if (!error) {
- pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
- if (pblist)
- copy_page_backup_list(pblist, p);
- free_pagedir(p, 0);
- if (!pblist)
+ error = mark_unsafe_pages(bm);
+ if (error)
+ goto Free;
+
+ error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
+ if (error)
+ goto Free;
+
+ duplicate_memory_bitmap(new_bm, bm);
+ memory_bm_free(bm, PG_UNSAFE_KEEP);
+ /* Reserve some safe pages for potential later use.
+ *
+ * NOTE: This way we make sure there will be enough safe pages for the
+ * chain_alloc() in get_buffer(). It is a bit wasteful, but
+ * nr_copy_pages cannot be greater than 50% of the memory anyway.
+ */
+ sp_list = NULL;
+ /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
+ nr_pages = nr_copy_pages - allocated_unsafe_pages;
+ nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
+ while (nr_pages > 0) {
+ lp = alloc_image_page(GFP_ATOMIC, PG_SAFE);
+ if (!lp) {
error = -ENOMEM;
+ goto Free;
+ }
+ lp->next = sp_list;
+ sp_list = lp;
+ nr_pages--;
}
- safe_pages = NULL;
- if (!error && nr_pages > unsafe_pages) {
- nr_pages -= unsafe_pages;
- while (nr_pages--) {
- struct safe_page *ptr;
-
- ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC);
- if (!ptr) {
- error = -ENOMEM;
- break;
- }
- if (!PageNosaveFree(virt_to_page(ptr))) {
- /* The page is "safe", add it to the list */
- ptr->next = safe_pages;
- safe_pages = ptr;
- }
- /* Mark the page as allocated */
- SetPageNosave(virt_to_page(ptr));
- SetPageNosaveFree(virt_to_page(ptr));
+ /* Preallocate memory for the image */
+ safe_pages_list = NULL;
+ nr_pages = nr_copy_pages - allocated_unsafe_pages;
+ while (nr_pages > 0) {
+ lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
+ if (!lp) {
+ error = -ENOMEM;
+ goto Free;
+ }
+ if (!PageNosaveFree(virt_to_page(lp))) {
+ /* The page is "safe", add it to the list */
+ lp->next = safe_pages_list;
+ safe_pages_list = lp;
}
+ /* Mark the page as allocated */
+ SetPageNosave(virt_to_page(lp));
+ SetPageNosaveFree(virt_to_page(lp));
+ nr_pages--;
}
- if (!error) {
- pagedir_nosave = pblist;
- } else {
- handle->pbe = NULL;
- swsusp_free();
+ /* Free the reserved safe pages so that chain_alloc() can use them */
+ while (sp_list) {
+ lp = sp_list->next;
+ free_image_page(sp_list, PG_UNSAFE_CLEAR);
+ sp_list = lp;
}
+ return 0;
+
+Free:
+ swsusp_free();
return error;
}
-static void *get_buffer(struct snapshot_handle *handle)
+/**
+ * get_buffer - compute the address that snapshot_write_next() should
+ * set for its caller to write to.
+ */
+
+static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
{
- struct pbe *pbe = handle->pbe, *last = handle->last_pbe;
- struct page *page = virt_to_page(pbe->orig_address);
+ struct pbe *pbe;
+ struct page *page = pfn_to_page(memory_bm_next_pfn(bm));
- if (PageNosave(page) && PageNosaveFree(page)) {
- /*
- * We have allocated the "original" page frame and we can
- * use it directly to store the read page
+ if (PageNosave(page) && PageNosaveFree(page))
+ /* We have allocated the "original" page frame and we can
+ * use it directly to store the loaded page.
*/
- pbe->address = 0;
- if (last && last->next)
- last->next = NULL;
- return (void *)pbe->orig_address;
- }
- /*
- * The "original" page frame has not been allocated and we have to
- * use a "safe" page frame to store the read page
+ return page_address(page);
+
+ /* The "original" page frame has not been allocated and we have to
+ * use a "safe" page frame to store the loaded page.
*/
- pbe->address = (unsigned long)safe_pages;
- safe_pages = safe_pages->next;
- if (last)
- last->next = pbe;
- handle->last_pbe = pbe;
+ pbe = chain_alloc(ca, sizeof(struct pbe));
+ if (!pbe) {
+ swsusp_free();
+ return NULL;
+ }
+ pbe->orig_address = (unsigned long)page_address(page);
+ pbe->address = (unsigned long)safe_pages_list;
+ safe_pages_list = safe_pages_list->next;
+ pbe->next = restore_pblist;
+ restore_pblist = pbe;
return (void *)pbe->address;
}
@@ -816,46 +1242,60 @@ static void *get_buffer(struct snapshot_handle *handle)
int snapshot_write_next(struct snapshot_handle *handle, size_t count)
{
+ static struct chain_allocator ca;
int error = 0;
- if (handle->prev && handle->page > nr_meta_pages + nr_copy_pages)
+ /* Check if we have already loaded the entire image */
+ if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
return 0;
+
if (!buffer) {
/* This makes the buffer be freed by swsusp_free() */
- buffer = alloc_image_page(GFP_ATOMIC, 0);
+ buffer = alloc_image_page(GFP_ATOMIC, PG_ANY);
if (!buffer)
return -ENOMEM;
}
if (!handle->offset)
handle->buffer = buffer;
- if (handle->prev < handle->page) {
- if (!handle->prev) {
- error = load_header(handle, (struct swsusp_info *)buffer);
+ handle->sync_read = 1;
+ if (handle->prev < handle->cur) {
+ if (handle->prev == 0) {
+ error = load_header(buffer);
if (error)
return error;
+
+ error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
+ if (error)
+ return error;
+
} else if (handle->prev <= nr_meta_pages) {
- handle->pbe = unpack_orig_addresses(buffer, handle->pbe);
- if (!handle->pbe) {
- error = prepare_image(handle);
+ unpack_orig_pfns(buffer, &copy_bm);
+ if (handle->prev == nr_meta_pages) {
+ error = prepare_image(&orig_bm, &copy_bm);
if (error)
return error;
- handle->pbe = pagedir_nosave;
- handle->last_pbe = NULL;
- handle->buffer = get_buffer(handle);
+
+ chain_init(&ca, GFP_ATOMIC, PG_SAFE);
+ memory_bm_position_reset(&orig_bm);
+ restore_pblist = NULL;
+ handle->buffer = get_buffer(&orig_bm, &ca);
+ handle->sync_read = 0;
+ if (!handle->buffer)
+ return -ENOMEM;
}
} else {
- handle->pbe = handle->pbe->next;
- handle->buffer = get_buffer(handle);
+ handle->buffer = get_buffer(&orig_bm, &ca);
+ handle->sync_read = 0;
}
- handle->prev = handle->page;
+ handle->prev = handle->cur;
}
- handle->buf_offset = handle->page_offset;
- if (handle->page_offset + count >= PAGE_SIZE) {
- count = PAGE_SIZE - handle->page_offset;
- handle->page_offset = 0;
- handle->page++;
+ handle->buf_offset = handle->cur_offset;
+ if (handle->cur_offset + count >= PAGE_SIZE) {
+ count = PAGE_SIZE - handle->cur_offset;
+ handle->cur_offset = 0;
+ handle->cur++;
} else {
- handle->page_offset += count;
+ handle->cur_offset += count;
}
handle->offset += count;
return count;
@@ -863,6 +1303,13 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
int snapshot_image_loaded(struct snapshot_handle *handle)
{
- return !(!handle->pbe || handle->pbe->next || !nr_copy_pages ||
- handle->page <= nr_meta_pages + nr_copy_pages);
+ return !(!nr_copy_pages ||
+ handle->cur <= nr_meta_pages + nr_copy_pages);
+}
+
+void snapshot_free_unused_memory(struct snapshot_handle *handle)
+{
+ /* Free only if we have loaded the image entirely */
+ if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
+ memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f1dd146bd64d..9b2ee5344dee 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -22,6 +22,7 @@
#include <linux/device.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
+#include <linux/blkdev.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/pm.h>
@@ -49,18 +50,16 @@ static int mark_swapfiles(swp_entry_t start)
{
int error;
- rw_swap_page_sync(READ,
- swp_entry(root_swap, 0),
- virt_to_page((unsigned long)&swsusp_header));
+ rw_swap_page_sync(READ, swp_entry(root_swap, 0),
+ virt_to_page((unsigned long)&swsusp_header), NULL);
if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
!memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
swsusp_header.image = start;
- error = rw_swap_page_sync(WRITE,
- swp_entry(root_swap, 0),
- virt_to_page((unsigned long)
- &swsusp_header));
+ error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0),
+ virt_to_page((unsigned long)&swsusp_header),
+ NULL);
} else {
pr_debug("swsusp: Partition is not swap space.\n");
error = -ENODEV;
@@ -88,16 +87,37 @@ static int swsusp_swap_check(void) /* This is called before saving image */
* write_page - Write one page to given swap location.
* @buf: Address we're writing.
* @offset: Offset of the swap page we're writing to.
+ * @bio_chain: Link the next write BIO here
*/
-static int write_page(void *buf, unsigned long offset)
+static int write_page(void *buf, unsigned long offset, struct bio **bio_chain)
{
swp_entry_t entry;
int error = -ENOSPC;
if (offset) {
+ struct page *page = virt_to_page(buf);
+
+ if (bio_chain) {
+ /*
+ * Whether or not we successfully allocated a copy page,
+ * we take a ref on the page here. It gets undone in
+ * wait_on_bio_chain().
+ */
+ struct page *page_copy;
+ page_copy = alloc_page(GFP_ATOMIC);
+ if (page_copy == NULL) {
+ WARN_ON_ONCE(1);
+ bio_chain = NULL; /* Go synchronous */
+ get_page(page);
+ } else {
+ memcpy(page_address(page_copy),
+ page_address(page), PAGE_SIZE);
+ page = page_copy;
+ }
+ }
entry = swp_entry(root_swap, offset);
- error = rw_swap_page_sync(WRITE, entry, virt_to_page(buf));
+ error = rw_swap_page_sync(WRITE, entry, page, bio_chain);
}
return error;
}
@@ -146,6 +166,26 @@ static void release_swap_writer(struct swap_map_handle *handle)
handle->bitmap = NULL;
}
+static void show_speed(struct timeval *start, struct timeval *stop,
+ unsigned nr_pages, char *msg)
+{
+ s64 elapsed_centisecs64;
+ int centisecs;
+ int k;
+ int kps;
+
+ elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
+ do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
+ centisecs = elapsed_centisecs64;
+ if (centisecs == 0)
+ centisecs = 1; /* avoid div-by-zero */
+ k = nr_pages * (PAGE_SIZE / 1024);
+ kps = (k * 100) / centisecs;
+ printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k,
+ centisecs / 100, centisecs % 100,
+ kps / 1000, (kps % 1000) / 10);
+}
+
static int get_swap_writer(struct swap_map_handle *handle)
{
handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
@@ -165,37 +205,70 @@ static int get_swap_writer(struct swap_map_handle *handle)
return 0;
}
-static int swap_write_page(struct swap_map_handle *handle, void *buf)
+static int wait_on_bio_chain(struct bio **bio_chain)
{
- int error;
+ struct bio *bio;
+ struct bio *next_bio;
+ int ret = 0;
+
+ if (bio_chain == NULL)
+ return 0;
+
+ bio = *bio_chain;
+ if (bio == NULL)
+ return 0;
+ while (bio) {
+ struct page *page;
+
+ next_bio = bio->bi_private;
+ page = bio->bi_io_vec[0].bv_page;
+ wait_on_page_locked(page);
+ if (!PageUptodate(page) || PageError(page))
+ ret = -EIO;
+ put_page(page);
+ bio_put(bio);
+ bio = next_bio;
+ }
+ *bio_chain = NULL;
+ return ret;
+}
+
+static int swap_write_page(struct swap_map_handle *handle, void *buf,
+ struct bio **bio_chain)
+{
+ int error = 0;
unsigned long offset;
if (!handle->cur)
return -EINVAL;
offset = alloc_swap_page(root_swap, handle->bitmap);
- error = write_page(buf, offset);
+ error = write_page(buf, offset, bio_chain);
if (error)
return error;
handle->cur->entries[handle->k++] = offset;
if (handle->k >= MAP_PAGE_ENTRIES) {
+ error = wait_on_bio_chain(bio_chain);
+ if (error)
+ goto out;
offset = alloc_swap_page(root_swap, handle->bitmap);
if (!offset)
return -ENOSPC;
handle->cur->next_swap = offset;
- error = write_page(handle->cur, handle->cur_swap);
+ error = write_page(handle->cur, handle->cur_swap, NULL);
if (error)
- return error;
+ goto out;
memset(handle->cur, 0, PAGE_SIZE);
handle->cur_swap = offset;
handle->k = 0;
}
- return 0;
+out:
+ return error;
}
static int flush_swap_writer(struct swap_map_handle *handle)
{
if (handle->cur && handle->cur_swap)
- return write_page(handle->cur, handle->cur_swap);
+ return write_page(handle->cur, handle->cur_swap, NULL);
else
return -EINVAL;
}
@@ -206,21 +279,29 @@ static int flush_swap_writer(struct swap_map_handle *handle)
static int save_image(struct swap_map_handle *handle,
struct snapshot_handle *snapshot,
- unsigned int nr_pages)
+ unsigned int nr_to_write)
{
unsigned int m;
int ret;
int error = 0;
+ int nr_pages;
+ int err2;
+ struct bio *bio;
+ struct timeval start;
+ struct timeval stop;
- printk("Saving image data pages (%u pages) ... ", nr_pages);
- m = nr_pages / 100;
+ printk("Saving image data pages (%u pages) ... ", nr_to_write);
+ m = nr_to_write / 100;
if (!m)
m = 1;
nr_pages = 0;
+ bio = NULL;
+ do_gettimeofday(&start);
do {
ret = snapshot_read_next(snapshot, PAGE_SIZE);
if (ret > 0) {
- error = swap_write_page(handle, data_of(*snapshot));
+ error = swap_write_page(handle, data_of(*snapshot),
+ &bio);
if (error)
break;
if (!(nr_pages % m))
@@ -228,8 +309,13 @@ static int save_image(struct swap_map_handle *handle,
nr_pages++;
}
} while (ret > 0);
+ err2 = wait_on_bio_chain(&bio);
+ do_gettimeofday(&stop);
+ if (!error)
+ error = err2;
if (!error)
printk("\b\b\b\bdone\n");
+ show_speed(&start, &stop, nr_to_write, "Wrote");
return error;
}
@@ -245,8 +331,7 @@ static int enough_swap(unsigned int nr_pages)
unsigned int free_swap = count_swap_pages(root_swap, 1);
pr_debug("swsusp: free swap pages: %u\n", free_swap);
- return free_swap > (nr_pages + PAGES_FOR_IO +
- (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
+ return free_swap > nr_pages + PAGES_FOR_IO;
}
/**
@@ -266,7 +351,8 @@ int swsusp_write(void)
int error;
if ((error = swsusp_swap_check())) {
- printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n");
+ printk(KERN_ERR "swsusp: Cannot find swap device, try "
+ "swapon -a.\n");
return error;
}
memset(&snapshot, 0, sizeof(struct snapshot_handle));
@@ -281,7 +367,7 @@ int swsusp_write(void)
error = get_swap_writer(&handle);
if (!error) {
unsigned long start = handle.cur_swap;
- error = swap_write_page(&handle, header);
+ error = swap_write_page(&handle, header, NULL);
if (!error)
error = save_image(&handle, &snapshot,
header->pages - 1);
@@ -298,27 +384,6 @@ int swsusp_write(void)
return error;
}
-/*
- * Using bio to read from swap.
- * This code requires a bit more work than just using buffer heads
- * but, it is the recommended way for 2.5/2.6.
- * The following are to signal the beginning and end of I/O. Bios
- * finish asynchronously, while we want them to happen synchronously.
- * A simple atomic_t, and a wait loop take care of this problem.
- */
-
-static atomic_t io_done = ATOMIC_INIT(0);
-
-static int end_io(struct bio *bio, unsigned int num, int err)
-{
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
- printk(KERN_ERR "I/O error reading swsusp image.\n");
- return -EIO;
- }
- atomic_set(&io_done, 0);
- return 0;
-}
-
static struct block_device *resume_bdev;
/**
@@ -326,15 +391,15 @@ static struct block_device *resume_bdev;
* @rw: READ or WRITE.
* @off physical offset of page.
* @page: page we're reading or writing.
+ * @bio_chain: list of pending biod (for async reading)
*
* Straight from the textbook - allocate and initialize the bio.
- * If we're writing, make sure the page is marked as dirty.
- * Then submit it and wait.
+ * If we're reading, make sure the page is marked as dirty.
+ * Then submit it and, if @bio_chain == NULL, wait.
*/
-
-static int submit(int rw, pgoff_t page_off, void *page)
+static int submit(int rw, pgoff_t page_off, struct page *page,
+ struct bio **bio_chain)
{
- int error = 0;
struct bio *bio;
bio = bio_alloc(GFP_ATOMIC, 1);
@@ -342,33 +407,40 @@ static int submit(int rw, pgoff_t page_off, void *page)
return -ENOMEM;
bio->bi_sector = page_off * (PAGE_SIZE >> 9);
bio->bi_bdev = resume_bdev;
- bio->bi_end_io = end_io;
+ bio->bi_end_io = end_swap_bio_read;
- if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
- printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
- error = -EFAULT;
- goto Done;
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ printk("swsusp: ERROR: adding page to bio at %ld\n", page_off);
+ bio_put(bio);
+ return -EFAULT;
}
- atomic_set(&io_done, 1);
- submit_bio(rw | (1 << BIO_RW_SYNC), bio);
- while (atomic_read(&io_done))
- yield();
- if (rw == READ)
- bio_set_pages_dirty(bio);
- Done:
- bio_put(bio);
- return error;
+ lock_page(page);
+ bio_get(bio);
+
+ if (bio_chain == NULL) {
+ submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+ wait_on_page_locked(page);
+ if (rw == READ)
+ bio_set_pages_dirty(bio);
+ bio_put(bio);
+ } else {
+ get_page(page);
+ bio->bi_private = *bio_chain;
+ *bio_chain = bio;
+ submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+ }
+ return 0;
}
-static int bio_read_page(pgoff_t page_off, void *page)
+static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
{
- return submit(READ, page_off, page);
+ return submit(READ, page_off, virt_to_page(addr), bio_chain);
}
-static int bio_write_page(pgoff_t page_off, void *page)
+static int bio_write_page(pgoff_t page_off, void *addr)
{
- return submit(WRITE, page_off, page);
+ return submit(WRITE, page_off, virt_to_page(addr), NULL);
}
/**
@@ -393,7 +465,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
if (!handle->cur)
return -ENOMEM;
- error = bio_read_page(swp_offset(start), handle->cur);
+ error = bio_read_page(swp_offset(start), handle->cur, NULL);
if (error) {
release_swap_reader(handle);
return error;
@@ -402,7 +474,8 @@ static int get_swap_reader(struct swap_map_handle *handle,
return 0;
}
-static int swap_read_page(struct swap_map_handle *handle, void *buf)
+static int swap_read_page(struct swap_map_handle *handle, void *buf,
+ struct bio **bio_chain)
{
unsigned long offset;
int error;
@@ -412,16 +485,17 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf)
offset = handle->cur->entries[handle->k];
if (!offset)
return -EFAULT;
- error = bio_read_page(offset, buf);
+ error = bio_read_page(offset, buf, bio_chain);
if (error)
return error;
if (++handle->k >= MAP_PAGE_ENTRIES) {
+ error = wait_on_bio_chain(bio_chain);
handle->k = 0;
offset = handle->cur->next_swap;
if (!offset)
release_swap_reader(handle);
- else
- error = bio_read_page(offset, handle->cur);
+ else if (!error)
+ error = bio_read_page(offset, handle->cur, NULL);
}
return error;
}
@@ -434,33 +508,49 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf)
static int load_image(struct swap_map_handle *handle,
struct snapshot_handle *snapshot,
- unsigned int nr_pages)
+ unsigned int nr_to_read)
{
unsigned int m;
- int ret;
int error = 0;
+ struct timeval start;
+ struct timeval stop;
+ struct bio *bio;
+ int err2;
+ unsigned nr_pages;
- printk("Loading image data pages (%u pages) ... ", nr_pages);
- m = nr_pages / 100;
+ printk("Loading image data pages (%u pages) ... ", nr_to_read);
+ m = nr_to_read / 100;
if (!m)
m = 1;
nr_pages = 0;
- do {
- ret = snapshot_write_next(snapshot, PAGE_SIZE);
- if (ret > 0) {
- error = swap_read_page(handle, data_of(*snapshot));
- if (error)
- break;
- if (!(nr_pages % m))
- printk("\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
- }
- } while (ret > 0);
+ bio = NULL;
+ do_gettimeofday(&start);
+ for ( ; ; ) {
+ error = snapshot_write_next(snapshot, PAGE_SIZE);
+ if (error <= 0)
+ break;
+ error = swap_read_page(handle, data_of(*snapshot), &bio);
+ if (error)
+ break;
+ if (snapshot->sync_read)
+ error = wait_on_bio_chain(&bio);
+ if (error)
+ break;
+ if (!(nr_pages % m))
+ printk("\b\b\b\b%3d%%", nr_pages / m);
+ nr_pages++;
+ }
+ err2 = wait_on_bio_chain(&bio);
+ do_gettimeofday(&stop);
+ if (!error)
+ error = err2;
if (!error) {
printk("\b\b\b\bdone\n");
+ snapshot_free_unused_memory(snapshot);
if (!snapshot_image_loaded(snapshot))
error = -ENODATA;
}
+ show_speed(&start, &stop, nr_to_read, "Read");
return error;
}
@@ -483,7 +573,7 @@ int swsusp_read(void)
header = (struct swsusp_info *)data_of(snapshot);
error = get_swap_reader(&handle, swsusp_header.image);
if (!error)
- error = swap_read_page(&handle, header);
+ error = swap_read_page(&handle, header, NULL);
if (!error)
error = load_image(&handle, &snapshot, header->pages - 1);
release_swap_reader(&handle);
@@ -509,7 +599,7 @@ int swsusp_check(void)
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
memset(&swsusp_header, 0, sizeof(swsusp_header));
- if ((error = bio_read_page(0, &swsusp_header)))
+ if ((error = bio_read_page(0, &swsusp_header, NULL)))
return error;
if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 17f669c83012..8ef677ea0cea 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -193,14 +193,13 @@ int swsusp_shrink_memory(void)
printk("Shrinking memory... ");
do {
size = 2 * count_highmem_pages();
- size += size / 50 + count_data_pages();
- size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
- PAGES_FOR_IO;
+ size += size / 50 + count_data_pages() + PAGES_FOR_IO;
tmp = size;
for_each_zone (zone)
if (!is_highmem(zone) && populated_zone(zone)) {
tmp -= zone->free_pages;
tmp += zone->lowmem_reserve[ZONE_NORMAL];
+ tmp += snapshot_additional_pages(zone);
}
if (tmp > 0) {
tmp = __shrink_memory(tmp);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 3f1539fbe48a..2e4499f3e4d9 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -19,6 +19,7 @@
#include <linux/swapops.h>
#include <linux/pm.h>
#include <linux/fs.h>
+#include <linux/cpu.h>
#include <asm/uaccess.h>
@@ -139,12 +140,15 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
if (data->frozen)
break;
down(&pm_sem);
- disable_nonboot_cpus();
- if (freeze_processes()) {
- thaw_processes();
- enable_nonboot_cpus();
- error = -EBUSY;
+ error = disable_nonboot_cpus();
+ if (!error) {
+ error = freeze_processes();
+ if (error) {
+ thaw_processes();
+ error = -EBUSY;
+ }
}
+ enable_nonboot_cpus();
up(&pm_sem);
if (!error)
data->frozen = 1;
@@ -189,6 +193,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
error = -EPERM;
break;
}
+ snapshot_free_unused_memory(&data->handle);
down(&pm_sem);
pm_prepare_console();
error = device_suspend(PMSG_FREEZE);
diff --git a/kernel/printk.c b/kernel/printk.c
index 1149365e989e..771f5e861bcd 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -721,6 +721,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
return 0;
}
+#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
/**
* suspend_console - suspend the console subsystem
*
@@ -728,6 +729,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
*/
void suspend_console(void)
{
+ printk("Suspending console(s)\n");
acquire_console_sem();
console_suspended = 1;
}
@@ -737,6 +739,7 @@ void resume_console(void)
console_suspended = 0;
release_console_sem();
}
+#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */
/**
* acquire_console_sem - lock the console system for exclusive use.
diff --git a/kernel/profile.c b/kernel/profile.c
index d5bd75e7501c..fb660c7d35ba 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -309,13 +309,17 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
node = cpu_to_node(cpu);
per_cpu(cpu_profile_flip, cpu) = 0;
if (!per_cpu(cpu_profile_hits, cpu)[1]) {
- page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+ page = alloc_pages_node(node,
+ GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+ 0);
if (!page)
return NOTIFY_BAD;
per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
}
if (!per_cpu(cpu_profile_hits, cpu)[0]) {
- page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+ page = alloc_pages_node(node,
+ GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+ 0);
if (!page)
goto out_free;
per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
@@ -491,12 +495,16 @@ static int __init create_hash_tables(void)
int node = cpu_to_node(cpu);
struct page *page;
- page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+ page = alloc_pages_node(node,
+ GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+ 0);
if (!page)
goto out_cleanup;
per_cpu(cpu_profile_hits, cpu)[1]
= (struct profile_hit *)page_address(page);
- page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+ page = alloc_pages_node(node,
+ GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+ 0);
if (!page)
goto out_cleanup;
per_cpu(cpu_profile_hits, cpu)[0]
diff --git a/kernel/sched.c b/kernel/sched.c
index a234fbee1238..5c848fd4e461 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -238,6 +238,7 @@ struct rq {
/* For active balancing */
int active_balance;
int push_cpu;
+ int cpu; /* cpu of this runqueue */
struct task_struct *migration_thread;
struct list_head migration_queue;
@@ -267,6 +268,15 @@ struct rq {
static DEFINE_PER_CPU(struct rq, runqueues);
+static inline int cpu_of(struct rq *rq)
+{
+#ifdef CONFIG_SMP
+ return rq->cpu;
+#else
+ return 0;
+#endif
+}
+
/*
* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
* See detach_destroy_domains: synchronize_sched for details.
@@ -2211,7 +2221,8 @@ out:
*/
static struct sched_group *
find_busiest_group(struct sched_domain *sd, int this_cpu,
- unsigned long *imbalance, enum idle_type idle, int *sd_idle)
+ unsigned long *imbalance, enum idle_type idle, int *sd_idle,
+ cpumask_t *cpus)
{
struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -2248,7 +2259,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
sum_weighted_load = sum_nr_running = avg_load = 0;
for_each_cpu_mask(i, group->cpumask) {
- struct rq *rq = cpu_rq(i);
+ struct rq *rq;
+
+ if (!cpu_isset(i, *cpus))
+ continue;
+
+ rq = cpu_rq(i);
if (*sd_idle && !idle_cpu(i))
*sd_idle = 0;
@@ -2466,13 +2482,17 @@ ret:
*/
static struct rq *
find_busiest_queue(struct sched_group *group, enum idle_type idle,
- unsigned long imbalance)
+ unsigned long imbalance, cpumask_t *cpus)
{
struct rq *busiest = NULL, *rq;
unsigned long max_load = 0;
int i;
for_each_cpu_mask(i, group->cpumask) {
+
+ if (!cpu_isset(i, *cpus))
+ continue;
+
rq = cpu_rq(i);
if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
@@ -2511,6 +2531,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_group *group;
unsigned long imbalance;
struct rq *busiest;
+ cpumask_t cpus = CPU_MASK_ALL;
if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
!sched_smt_power_savings)
@@ -2518,13 +2539,15 @@ static int load_balance(int this_cpu, struct rq *this_rq,
schedstat_inc(sd, lb_cnt[idle]);
- group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle);
+redo:
+ group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
+ &cpus);
if (!group) {
schedstat_inc(sd, lb_nobusyg[idle]);
goto out_balanced;
}
- busiest = find_busiest_queue(group, idle, imbalance);
+ busiest = find_busiest_queue(group, idle, imbalance, &cpus);
if (!busiest) {
schedstat_inc(sd, lb_nobusyq[idle]);
goto out_balanced;
@@ -2549,8 +2572,12 @@ static int load_balance(int this_cpu, struct rq *this_rq,
double_rq_unlock(this_rq, busiest);
/* All tasks on this runqueue were pinned by CPU affinity */
- if (unlikely(all_pinned))
+ if (unlikely(all_pinned)) {
+ cpu_clear(cpu_of(busiest), cpus);
+ if (!cpus_empty(cpus))
+ goto redo;
goto out_balanced;
+ }
}
if (!nr_moved) {
@@ -2639,18 +2666,22 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
unsigned long imbalance;
int nr_moved = 0;
int sd_idle = 0;
+ cpumask_t cpus = CPU_MASK_ALL;
if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
sd_idle = 1;
schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
- group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle);
+redo:
+ group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE,
+ &sd_idle, &cpus);
if (!group) {
schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
goto out_balanced;
}
- busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance);
+ busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance,
+ &cpus);
if (!busiest) {
schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
goto out_balanced;
@@ -2668,6 +2699,12 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
minus_1_or_zero(busiest->nr_running),
imbalance, sd, NEWLY_IDLE, NULL);
spin_unlock(&busiest->lock);
+
+ if (!nr_moved) {
+ cpu_clear(cpu_of(busiest), cpus);
+ if (!cpus_empty(cpus))
+ goto redo;
+ }
}
if (!nr_moved) {
@@ -6747,6 +6784,7 @@ void __init sched_init(void)
rq->cpu_load[j] = 0;
rq->active_balance = 0;
rq->push_cpu = 0;
+ rq->cpu = i;
rq->migration_thread = NULL;
INIT_LIST_HEAD(&rq->migration_queue);
#endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 362a0cc37138..fd43c3e6786b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -943,6 +943,17 @@ static ctl_table vm_table[] = {
.extra1 = &zero,
.extra2 = &one_hundred,
},
+ {
+ .ctl_name = VM_MIN_SLAB,
+ .procname = "min_slab_ratio",
+ .data = &sysctl_min_slab_ratio,
+ .maxlen = sizeof(sysctl_min_slab_ratio),
+ .mode = 0644,
+ .proc_handler = &sysctl_min_slab_ratio_sysctl_handler,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one_hundred,
+ },
#endif
#ifdef CONFIG_X86_32
{
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 554ee688a9f8..3f21cc79a134 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -277,7 +277,7 @@ config DEBUG_HIGHMEM
config DEBUG_BUGVERBOSE
bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED
depends on BUG
- depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV
+ depends on ARM || ARM26 || AVR32 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV
default !EMBEDDED
help
Say Y here to make BUG() panics output the file name and line number
@@ -315,7 +315,7 @@ config DEBUG_VM
config FRAME_POINTER
bool "Compile the kernel with frame pointers"
- depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390)
+ depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || AVR32)
default y if DEBUG_INFO && UML
help
If you say Y here the resulting kernel image will be slightly larger
diff --git a/mm/Makefile b/mm/Makefile
index 9dd824c11eeb..60c56c0b5e10 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -23,4 +23,4 @@ obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
-
+obj-$(CONFIG_SMP) += allocpercpu.o
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
new file mode 100644
index 000000000000..eaa9abeea536
--- /dev/null
+++ b/mm/allocpercpu.c
@@ -0,0 +1,129 @@
+/*
+ * linux/mm/allocpercpu.c
+ *
+ * Separated from slab.c August 11, 2006 Christoph Lameter <clameter@sgi.com>
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+
+/**
+ * percpu_depopulate - depopulate per-cpu data for given cpu
+ * @__pdata: per-cpu data to depopulate
+ * @cpu: depopulate per-cpu data for this cpu
+ *
+ * Depopulating per-cpu data for a cpu going offline would be a typical
+ * use case. You need to register a cpu hotplug handler for that purpose.
+ */
+void percpu_depopulate(void *__pdata, int cpu)
+{
+ struct percpu_data *pdata = __percpu_disguise(__pdata);
+ if (pdata->ptrs[cpu]) {
+ kfree(pdata->ptrs[cpu]);
+ pdata->ptrs[cpu] = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(percpu_depopulate);
+
+/**
+ * percpu_depopulate_mask - depopulate per-cpu data for some cpu's
+ * @__pdata: per-cpu data to depopulate
+ * @mask: depopulate per-cpu data for cpu's selected through mask bits
+ */
+void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+{
+ int cpu;
+ for_each_cpu_mask(cpu, *mask)
+ percpu_depopulate(__pdata, cpu);
+}
+EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
+
+/**
+ * percpu_populate - populate per-cpu data for given cpu
+ * @__pdata: per-cpu data to populate further
+ * @size: size of per-cpu object
+ * @gfp: may sleep or not etc.
+ * @cpu: populate per-data for this cpu
+ *
+ * Populating per-cpu data for a cpu coming online would be a typical
+ * use case. You need to register a cpu hotplug handler for that purpose.
+ * Per-cpu object is populated with zeroed buffer.
+ */
+void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
+{
+ struct percpu_data *pdata = __percpu_disguise(__pdata);
+ int node = cpu_to_node(cpu);
+
+ BUG_ON(pdata->ptrs[cpu]);
+ if (node_online(node)) {
+ /* FIXME: kzalloc_node(size, gfp, node) */
+ pdata->ptrs[cpu] = kmalloc_node(size, gfp, node);
+ if (pdata->ptrs[cpu])
+ memset(pdata->ptrs[cpu], 0, size);
+ } else
+ pdata->ptrs[cpu] = kzalloc(size, gfp);
+ return pdata->ptrs[cpu];
+}
+EXPORT_SYMBOL_GPL(percpu_populate);
+
+/**
+ * percpu_populate_mask - populate per-cpu data for more cpu's
+ * @__pdata: per-cpu data to populate further
+ * @size: size of per-cpu object
+ * @gfp: may sleep or not etc.
+ * @mask: populate per-cpu data for cpu's selected through mask bits
+ *
+ * Per-cpu objects are populated with zeroed buffers.
+ */
+int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+ cpumask_t *mask)
+{
+ cpumask_t populated = CPU_MASK_NONE;
+ int cpu;
+
+ for_each_cpu_mask(cpu, *mask)
+ if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) {
+ __percpu_depopulate_mask(__pdata, &populated);
+ return -ENOMEM;
+ } else
+ cpu_set(cpu, populated);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__percpu_populate_mask);
+
+/**
+ * percpu_alloc_mask - initial setup of per-cpu data
+ * @size: size of per-cpu object
+ * @gfp: may sleep or not etc.
+ * @mask: populate per-data for cpu's selected through mask bits
+ *
+ * Populating per-cpu data for all online cpu's would be a typical use case,
+ * which is simplified by the percpu_alloc() wrapper.
+ * Per-cpu objects are populated with zeroed buffers.
+ */
+void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
+{
+ void *pdata = kzalloc(sizeof(struct percpu_data), gfp);
+ void *__pdata = __percpu_disguise(pdata);
+
+ if (unlikely(!pdata))
+ return NULL;
+ if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask)))
+ return __pdata;
+ kfree(pdata);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(__percpu_alloc_mask);
+
+/**
+ * percpu_free - final cleanup of per-cpu data
+ * @__pdata: object to clean up
+ *
+ * We simply clean up any per-cpu object left. No need for the client to
+ * track and specify through a bis mask which per-cpu objects are to free.
+ */
+void percpu_free(void *__pdata)
+{
+ __percpu_depopulate_mask(__pdata, &cpu_possible_map);
+ kfree(__percpu_disguise(__pdata));
+}
+EXPORT_SYMBOL_GPL(percpu_free);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 50353e0dac12..d53112fcb404 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -8,17 +8,15 @@
* free memory collector. It's used to deal with reserved
* system memory and memory holes as well.
*/
-
-#include <linux/mm.h>
-#include <linux/kernel_stat.h>
-#include <linux/swap.h>
-#include <linux/interrupt.h>
#include <linux/init.h>
+#include <linux/pfn.h>
#include <linux/bootmem.h>
-#include <linux/mmzone.h>
#include <linux/module.h>
-#include <asm/dma.h>
+
+#include <asm/bug.h>
#include <asm/io.h>
+#include <asm/processor.h>
+
#include "internal.h"
/*
@@ -41,7 +39,7 @@ unsigned long saved_max_pfn;
#endif
/* return the number of _pages_ that will be allocated for the boot bitmap */
-unsigned long __init bootmem_bootmap_pages (unsigned long pages)
+unsigned long __init bootmem_bootmap_pages(unsigned long pages)
{
unsigned long mapsize;
@@ -51,12 +49,14 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages)
return mapsize;
}
+
/*
* link bdata in order
*/
-static void link_bootmem(bootmem_data_t *bdata)
+static void __init link_bootmem(bootmem_data_t *bdata)
{
bootmem_data_t *ent;
+
if (list_empty(&bdata_list)) {
list_add(&bdata->list, &bdata_list);
return;
@@ -69,22 +69,32 @@ static void link_bootmem(bootmem_data_t *bdata)
}
}
list_add_tail(&bdata->list, &bdata_list);
- return;
}
+/*
+ * Given an initialised bdata, it returns the size of the boot bitmap
+ */
+static unsigned long __init get_mapsize(bootmem_data_t *bdata)
+{
+ unsigned long mapsize;
+ unsigned long start = PFN_DOWN(bdata->node_boot_start);
+ unsigned long end = bdata->node_low_pfn;
+
+ mapsize = ((end - start) + 7) / 8;
+ return ALIGN(mapsize, sizeof(long));
+}
/*
* Called once to set up the allocator itself.
*/
-static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
+static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
unsigned long mapstart, unsigned long start, unsigned long end)
{
bootmem_data_t *bdata = pgdat->bdata;
- unsigned long mapsize = ((end - start)+7)/8;
+ unsigned long mapsize;
- mapsize = ALIGN(mapsize, sizeof(long));
- bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
- bdata->node_boot_start = (start << PAGE_SHIFT);
+ bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
+ bdata->node_boot_start = PFN_PHYS(start);
bdata->node_low_pfn = end;
link_bootmem(bdata);
@@ -92,6 +102,7 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
* Initially all pages are reserved - setup_arch() has to
* register free RAM areas explicitly.
*/
+ mapsize = get_mapsize(bdata);
memset(bdata->node_bootmem_map, 0xff, mapsize);
return mapsize;
@@ -102,22 +113,22 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
* might be used for boot-time allocations - or it might get added
* to the free page pool later on.
*/
-static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
+static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
+ unsigned long size)
{
+ unsigned long sidx, eidx;
unsigned long i;
+
/*
* round up, partially reserved pages are considered
* fully reserved.
*/
- unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE;
- unsigned long eidx = (addr + size - bdata->node_boot_start +
- PAGE_SIZE-1)/PAGE_SIZE;
- unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE;
-
BUG_ON(!size);
- BUG_ON(sidx >= eidx);
- BUG_ON((addr >> PAGE_SHIFT) >= bdata->node_low_pfn);
- BUG_ON(end > bdata->node_low_pfn);
+ BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
+ BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
+
+ sidx = PFN_DOWN(addr - bdata->node_boot_start);
+ eidx = PFN_UP(addr + size - bdata->node_boot_start);
for (i = sidx; i < eidx; i++)
if (test_and_set_bit(i, bdata->node_bootmem_map)) {
@@ -127,20 +138,18 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long add
}
}
-static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
+static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
+ unsigned long size)
{
+ unsigned long sidx, eidx;
unsigned long i;
- unsigned long start;
+
/*
* round down end of usable mem, partially free pages are
* considered reserved.
*/
- unsigned long sidx;
- unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE;
- unsigned long end = (addr + size)/PAGE_SIZE;
-
BUG_ON(!size);
- BUG_ON(end > bdata->node_low_pfn);
+ BUG_ON(PFN_DOWN(addr + size) > bdata->node_low_pfn);
if (addr < bdata->last_success)
bdata->last_success = addr;
@@ -148,8 +157,8 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
/*
* Round up the beginning of the address.
*/
- start = (addr + PAGE_SIZE-1) / PAGE_SIZE;
- sidx = start - (bdata->node_boot_start/PAGE_SIZE);
+ sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start);
+ eidx = PFN_DOWN(addr + size - bdata->node_boot_start);
for (i = sidx; i < eidx; i++) {
if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
@@ -175,10 +184,10 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
unsigned long align, unsigned long goal, unsigned long limit)
{
unsigned long offset, remaining_size, areasize, preferred;
- unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn;
+ unsigned long i, start = 0, incr, eidx, end_pfn;
void *ret;
- if(!size) {
+ if (!size) {
printk("__alloc_bootmem_core(): zero-sized request\n");
BUG();
}
@@ -187,23 +196,22 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
if (limit && bdata->node_boot_start >= limit)
return NULL;
- limit >>=PAGE_SHIFT;
+ end_pfn = bdata->node_low_pfn;
+ limit = PFN_DOWN(limit);
if (limit && end_pfn > limit)
end_pfn = limit;
- eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+ eidx = end_pfn - PFN_DOWN(bdata->node_boot_start);
offset = 0;
- if (align &&
- (bdata->node_boot_start & (align - 1UL)) != 0)
- offset = (align - (bdata->node_boot_start & (align - 1UL)));
- offset >>= PAGE_SHIFT;
+ if (align && (bdata->node_boot_start & (align - 1UL)) != 0)
+ offset = align - (bdata->node_boot_start & (align - 1UL));
+ offset = PFN_DOWN(offset);
/*
* We try to allocate bootmem pages above 'goal'
* first, then we try to allocate lower pages.
*/
- if (goal && (goal >= bdata->node_boot_start) &&
- ((goal >> PAGE_SHIFT) < end_pfn)) {
+ if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) {
preferred = goal - bdata->node_boot_start;
if (bdata->last_success >= preferred)
@@ -212,9 +220,8 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
} else
preferred = 0;
- preferred = ALIGN(preferred, align) >> PAGE_SHIFT;
- preferred += offset;
- areasize = (size+PAGE_SIZE-1)/PAGE_SIZE;
+ preferred = PFN_DOWN(ALIGN(preferred, align)) + offset;
+ areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
incr = align >> PAGE_SHIFT ? : 1;
restart_scan:
@@ -229,7 +236,7 @@ restart_scan:
for (j = i + 1; j < i + areasize; ++j) {
if (j >= eidx)
goto fail_block;
- if (test_bit (j, bdata->node_bootmem_map))
+ if (test_bit(j, bdata->node_bootmem_map))
goto fail_block;
}
start = i;
@@ -245,7 +252,7 @@ restart_scan:
return NULL;
found:
- bdata->last_success = start << PAGE_SHIFT;
+ bdata->last_success = PFN_PHYS(start);
BUG_ON(start >= eidx);
/*
@@ -257,19 +264,21 @@ found:
bdata->last_offset && bdata->last_pos+1 == start) {
offset = ALIGN(bdata->last_offset, align);
BUG_ON(offset > PAGE_SIZE);
- remaining_size = PAGE_SIZE-offset;
+ remaining_size = PAGE_SIZE - offset;
if (size < remaining_size) {
areasize = 0;
/* last_pos unchanged */
- bdata->last_offset = offset+size;
- ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
- bdata->node_boot_start);
+ bdata->last_offset = offset + size;
+ ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
+ offset +
+ bdata->node_boot_start);
} else {
remaining_size = size - remaining_size;
- areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE;
- ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
- bdata->node_boot_start);
- bdata->last_pos = start+areasize-1;
+ areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
+ ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
+ offset +
+ bdata->node_boot_start);
+ bdata->last_pos = start + areasize - 1;
bdata->last_offset = remaining_size;
}
bdata->last_offset &= ~PAGE_MASK;
@@ -282,7 +291,7 @@ found:
/*
* Reserve the area now:
*/
- for (i = start; i < start+areasize; i++)
+ for (i = start; i < start + areasize; i++)
if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
BUG();
memset(ret, 0, size);
@@ -303,8 +312,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
count = 0;
/* first extant page of the node */
- pfn = bdata->node_boot_start >> PAGE_SHIFT;
- idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+ pfn = PFN_DOWN(bdata->node_boot_start);
+ idx = bdata->node_low_pfn - pfn;
map = bdata->node_bootmem_map;
/* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */
if (bdata->node_boot_start == 0 ||
@@ -333,7 +342,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
}
}
} else {
- i+=BITS_PER_LONG;
+ i += BITS_PER_LONG;
}
pfn += BITS_PER_LONG;
}
@@ -345,9 +354,10 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
*/
page = virt_to_page(bdata->node_bootmem_map);
count = 0;
- for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {
- count++;
+ idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT;
+ for (i = 0; i < idx; i++, page++) {
__free_pages_bootmem(page, 0);
+ count++;
}
total += count;
bdata->node_bootmem_map = NULL;
@@ -355,64 +365,72 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
return total;
}
-unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn)
+unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
+ unsigned long startpfn, unsigned long endpfn)
{
- return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn));
+ return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
}
-void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size)
+void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+ unsigned long size)
{
reserve_bootmem_core(pgdat->bdata, physaddr, size);
}
-void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size)
+void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+ unsigned long size)
{
free_bootmem_core(pgdat->bdata, physaddr, size);
}
-unsigned long __init free_all_bootmem_node (pg_data_t *pgdat)
+unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
{
- return(free_all_bootmem_core(pgdat));
+ return free_all_bootmem_core(pgdat);
}
-unsigned long __init init_bootmem (unsigned long start, unsigned long pages)
+unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
{
max_low_pfn = pages;
min_low_pfn = start;
- return(init_bootmem_core(NODE_DATA(0), start, 0, pages));
+ return init_bootmem_core(NODE_DATA(0), start, 0, pages);
}
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-void __init reserve_bootmem (unsigned long addr, unsigned long size)
+void __init reserve_bootmem(unsigned long addr, unsigned long size)
{
reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size);
}
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-void __init free_bootmem (unsigned long addr, unsigned long size)
+void __init free_bootmem(unsigned long addr, unsigned long size)
{
free_bootmem_core(NODE_DATA(0)->bdata, addr, size);
}
-unsigned long __init free_all_bootmem (void)
+unsigned long __init free_all_bootmem(void)
{
- return(free_all_bootmem_core(NODE_DATA(0)));
+ return free_all_bootmem_core(NODE_DATA(0));
}
-void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
+ unsigned long goal)
{
bootmem_data_t *bdata;
void *ptr;
- list_for_each_entry(bdata, &bdata_list, list)
- if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
- return(ptr);
+ list_for_each_entry(bdata, &bdata_list, list) {
+ ptr = __alloc_bootmem_core(bdata, size, align, goal, 0);
+ if (ptr)
+ return ptr;
+ }
return NULL;
}
-void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem(unsigned long size, unsigned long align,
+ unsigned long goal)
{
void *mem = __alloc_bootmem_nopanic(size,align,goal);
+
if (mem)
return mem;
/*
@@ -424,29 +442,34 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned
}
-void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align,
- unsigned long goal)
+void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
+ unsigned long align, unsigned long goal)
{
void *ptr;
ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
if (ptr)
- return (ptr);
+ return ptr;
return __alloc_bootmem(size, align, goal);
}
-#define LOW32LIMIT 0xffffffff
+#ifndef ARCH_LOW_ADDRESS_LIMIT
+#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
+#endif
-void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
+ unsigned long goal)
{
bootmem_data_t *bdata;
void *ptr;
- list_for_each_entry(bdata, &bdata_list, list)
- if ((ptr = __alloc_bootmem_core(bdata, size,
- align, goal, LOW32LIMIT)))
- return(ptr);
+ list_for_each_entry(bdata, &bdata_list, list) {
+ ptr = __alloc_bootmem_core(bdata, size, align, goal,
+ ARCH_LOW_ADDRESS_LIMIT);
+ if (ptr)
+ return ptr;
+ }
/*
* Whoops, we cannot satisfy the allocation request.
@@ -459,5 +482,6 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsig
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
- return __alloc_bootmem_core(pgdat->bdata, size, align, goal, LOW32LIMIT);
+ return __alloc_bootmem_core(pgdat->bdata, size, align, goal,
+ ARCH_LOW_ADDRESS_LIMIT);
}
diff --git a/mm/filemap.c b/mm/filemap.c
index b9a60c43b61a..afcdc72b5e90 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -488,6 +488,12 @@ struct page *page_cache_alloc_cold(struct address_space *x)
EXPORT_SYMBOL(page_cache_alloc_cold);
#endif
+static int __sleep_on_page_lock(void *word)
+{
+ io_schedule();
+ return 0;
+}
+
/*
* In order to wait for pages to become available there must be
* waitqueues associated with pages. By using a hash table of
@@ -577,13 +583,24 @@ void fastcall __lock_page(struct page *page)
}
EXPORT_SYMBOL(__lock_page);
+/*
+ * Variant of lock_page that does not require the caller to hold a reference
+ * on the page's mapping.
+ */
+void fastcall __lock_page_nosync(struct page *page)
+{
+ DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+ __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
+ TASK_UNINTERRUPTIBLE);
+}
+
/**
* find_get_page - find and get a page reference
* @mapping: the address_space to search
* @offset: the page index
*
- * A rather lightweight function, finding and getting a reference to a
- * hashed page atomically.
+ * Is there a pagecache struct page at the given (mapping, offset) tuple?
+ * If yes, increment its refcount and return it; if no, return NULL.
*/
struct page * find_get_page(struct address_space *mapping, unsigned long offset)
{
@@ -970,7 +987,7 @@ page_not_up_to_date:
/* Get exclusive access to the page ... */
lock_page(page);
- /* Did it get unhashed before we got the lock? */
+ /* Did it get truncated before we got the lock? */
if (!page->mapping) {
unlock_page(page);
page_cache_release(page);
@@ -1610,7 +1627,7 @@ no_cached_page:
page_not_uptodate:
lock_page(page);
- /* Did it get unhashed while we waited for it? */
+ /* Did it get truncated while we waited for it? */
if (!page->mapping) {
unlock_page(page);
goto err;
diff --git a/mm/fremap.c b/mm/fremap.c
index 21b7d0cbc98c..aa30618ec6b2 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -79,9 +79,9 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
inc_mm_counter(mm, file_rss);
flush_icache_page(vma, page);
- set_pte_at(mm, addr, pte, mk_pte(page, prot));
+ pte_val = mk_pte(page, prot);
+ set_pte_at(mm, addr, pte, pte_val);
page_add_file_rmap(page);
- pte_val = *pte;
update_mmu_cache(vma, addr, pte_val);
lazy_mmu_prot_update(pte_val);
err = 0;
diff --git a/mm/highmem.c b/mm/highmem.c
index 9b2a5403c447..ee5519b176ee 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -46,6 +46,19 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
*/
#ifdef CONFIG_HIGHMEM
+unsigned long totalhigh_pages __read_mostly;
+
+unsigned int nr_free_highpages (void)
+{
+ pg_data_t *pgdat;
+ unsigned int pages = 0;
+
+ for_each_online_pgdat(pgdat)
+ pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+
+ return pages;
+}
+
static int pkmap_count[LAST_PKMAP];
static unsigned int last_pkmap_nr;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index df499973255f..7c7d03dbf73d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -72,7 +72,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
struct zone **z;
for (z = zonelist->zones; *z; z++) {
- nid = (*z)->zone_pgdat->node_id;
+ nid = zone_to_nid(*z);
if (cpuset_zone_allowed(*z, GFP_HIGHUSER) &&
!list_empty(&hugepage_freelists[nid]))
break;
@@ -177,7 +177,7 @@ static void update_and_free_page(struct page *page)
{
int i;
nr_huge_pages--;
- nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--;
+ nr_huge_pages_node[page_to_nid(page)]--;
for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
@@ -191,7 +191,8 @@ static void update_and_free_page(struct page *page)
#ifdef CONFIG_HIGHMEM
static void try_to_free_low(unsigned long count)
{
- int i, nid;
+ int i;
+
for (i = 0; i < MAX_NUMNODES; ++i) {
struct page *page, *next;
list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
@@ -199,9 +200,8 @@ static void try_to_free_low(unsigned long count)
continue;
list_del(&page->lru);
update_and_free_page(page);
- nid = page_zone(page)->zone_pgdat->node_id;
free_huge_pages--;
- free_huge_pages_node[nid]--;
+ free_huge_pages_node[page_to_nid(page)]--;
if (count >= nr_huge_pages)
return;
}
diff --git a/mm/internal.h b/mm/internal.h
index d20e3cc4aef0..d527b80b292f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -24,8 +24,8 @@ static inline void set_page_count(struct page *page, int v)
*/
static inline void set_page_refcounted(struct page *page)
{
- BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page);
- BUG_ON(atomic_read(&page->_count));
+ VM_BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page);
+ VM_BUG_ON(atomic_read(&page->_count));
set_page_count(page, 1);
}
diff --git a/mm/memory.c b/mm/memory.c
index 109e9866237e..92a3ebd8d795 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -49,6 +49,7 @@
#include <linux/module.h>
#include <linux/delayacct.h>
#include <linux/init.h>
+#include <linux/writeback.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
@@ -1226,7 +1227,12 @@ out:
return retval;
}
-/*
+/**
+ * vm_insert_page - insert single page into user vma
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @page: source kernel page
+ *
* This allows drivers to insert individual pages they've allocated
* into a user vma.
*
@@ -1318,7 +1324,16 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
return 0;
}
-/* Note: this is only safe if the mm semaphore is held when called. */
+/**
+ * remap_pfn_range - remap kernel memory to userspace
+ * @vma: user vma to map to
+ * @addr: target user address to start at
+ * @pfn: physical address of kernel memory
+ * @size: size of map area
+ * @prot: page protection flags for this mapping
+ *
+ * Note: this is only safe if the mm semaphore is held when called.
+ */
int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
@@ -1458,14 +1473,29 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
{
struct page *old_page, *new_page;
pte_t entry;
- int reuse, ret = VM_FAULT_MINOR;
+ int reuse = 0, ret = VM_FAULT_MINOR;
+ struct page *dirty_page = NULL;
old_page = vm_normal_page(vma, address, orig_pte);
if (!old_page)
goto gotten;
- if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) ==
- (VM_SHARED|VM_WRITE))) {
+ /*
+ * Take out anonymous pages first, anonymous shared vmas are
+ * not dirty accountable.
+ */
+ if (PageAnon(old_page)) {
+ if (!TestSetPageLocked(old_page)) {
+ reuse = can_share_swap_page(old_page);
+ unlock_page(old_page);
+ }
+ } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
+ (VM_WRITE|VM_SHARED))) {
+ /*
+ * Only catch write-faults on shared writable pages,
+ * read-only shared pages can get COWed by
+ * get_user_pages(.write=1, .force=1).
+ */
if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
/*
* Notify the address space that the page is about to
@@ -1494,13 +1524,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (!pte_same(*page_table, orig_pte))
goto unlock;
}
-
+ dirty_page = old_page;
+ get_page(dirty_page);
reuse = 1;
- } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
- reuse = can_share_swap_page(old_page);
- unlock_page(old_page);
- } else {
- reuse = 0;
}
if (reuse) {
@@ -1566,6 +1592,10 @@ gotten:
page_cache_release(old_page);
unlock:
pte_unmap_unlock(page_table, ptl);
+ if (dirty_page) {
+ set_page_dirty_balance(dirty_page);
+ put_page(dirty_page);
+ }
return ret;
oom:
if (old_page)
@@ -1785,9 +1815,10 @@ void unmap_mapping_range(struct address_space *mapping,
}
EXPORT_SYMBOL(unmap_mapping_range);
-/*
- * Handle all mappings that got truncated by a "truncate()"
- * system call.
+/**
+ * vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
*
* NOTE! We have to be ready to update the memory sharing
* between the file and the memory map for a potential last
@@ -1856,11 +1887,16 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
}
EXPORT_UNUSED_SYMBOL(vmtruncate_range); /* June 2006 */
-/*
+/**
+ * swapin_readahead - swap in pages in hope we need them soon
+ * @entry: swap entry of this memory
+ * @addr: address to start
+ * @vma: user vma this addresses belong to
+ *
* Primitive swap readahead code. We simply read an aligned block of
* (1 << page_cluster) entries in the swap area. This method is chosen
* because it doesn't cost us any seek time. We also make sure to queue
- * the 'original' request together with the readahead ones...
+ * the 'original' request together with the readahead ones...
*
* This has been extended to use the NUMA policies from the mm triggering
* the readahead.
@@ -2098,6 +2134,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned int sequence = 0;
int ret = VM_FAULT_MINOR;
int anon = 0;
+ struct page *dirty_page = NULL;
pte_unmap(page_table);
BUG_ON(vma->vm_flags & VM_PFNMAP);
@@ -2192,6 +2229,10 @@ retry:
} else {
inc_mm_counter(mm, file_rss);
page_add_file_rmap(new_page);
+ if (write_access) {
+ dirty_page = new_page;
+ get_page(dirty_page);
+ }
}
} else {
/* One of our sibling threads was faster, back out. */
@@ -2204,6 +2245,10 @@ retry:
lazy_mmu_prot_update(entry);
unlock:
pte_unmap_unlock(page_table, ptl);
+ if (dirty_page) {
+ set_page_dirty_balance(dirty_page);
+ put_page(dirty_page);
+ }
return ret;
oom:
page_cache_release(new_page);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a9963ceddd65..38f89650bc84 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache;
/* Highest zone. An specific allocation for a zone below that is not
policied. */
-int policy_zone = ZONE_DMA;
+enum zone_type policy_zone = ZONE_DMA;
struct mempolicy default_policy = {
.refcnt = ATOMIC_INIT(1), /* never free it */
@@ -137,7 +137,8 @@ static int mpol_check_policy(int mode, nodemask_t *nodes)
static struct zonelist *bind_zonelist(nodemask_t *nodes)
{
struct zonelist *zl;
- int num, max, nd, k;
+ int num, max, nd;
+ enum zone_type k;
max = 1 + MAX_NR_ZONES * nodes_weight(*nodes);
zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL);
@@ -148,12 +149,16 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
lower zones etc. Avoid empty zones because the memory allocator
doesn't like them. If you implement node hot removal you
have to fix that. */
- for (k = policy_zone; k >= 0; k--) {
+ k = policy_zone;
+ while (1) {
for_each_node_mask(nd, *nodes) {
struct zone *z = &NODE_DATA(nd)->node_zones[k];
if (z->present_pages > 0)
zl->zones[num++] = z;
}
+ if (k == 0)
+ break;
+ k--;
}
zl->zones[num] = NULL;
return zl;
@@ -482,7 +487,7 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
switch (p->policy) {
case MPOL_BIND:
for (i = 0; p->v.zonelist->zones[i]; i++)
- node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id,
+ node_set(zone_to_nid(p->v.zonelist->zones[i]),
*nodes);
break;
case MPOL_DEFAULT:
@@ -1140,7 +1145,7 @@ unsigned slab_node(struct mempolicy *policy)
* Follow bind policy behavior and start allocation at the
* first node.
*/
- return policy->v.zonelist->zones[0]->zone_pgdat->node_id;
+ return zone_to_nid(policy->v.zonelist->zones[0]);
case MPOL_PREFERRED:
if (policy->v.preferred_node >= 0)
@@ -1285,7 +1290,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
if ((gfp & __GFP_WAIT) && !in_interrupt())
cpuset_update_task_memory_state();
- if (!pol || in_interrupt())
+ if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
pol = &default_policy;
if (pol->policy == MPOL_INTERLEAVE)
return alloc_page_interleave(gfp, order, interleave_nodes(pol));
@@ -1644,7 +1649,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
nodes_clear(nodes);
for (z = pol->v.zonelist->zones; *z; z++)
- node_set((*z)->zone_pgdat->node_id, nodes);
+ node_set(zone_to_nid(*z), nodes);
nodes_remap(tmp, nodes, *mpolmask, *newmask);
nodes = tmp;
diff --git a/mm/migrate.c b/mm/migrate.c
index 3f1e0c2c942c..20a8c2687b1e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -741,7 +741,7 @@ static struct page *new_page_node(struct page *p, unsigned long private,
*result = &pm->status;
- return alloc_pages_node(pm->node, GFP_HIGHUSER, 0);
+ return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0);
}
/*
diff --git a/mm/mmap.c b/mm/mmap.c
index d799d896d74a..eea8eefd51a8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -116,7 +116,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
* which are reclaimable, under pressure. The dentry
* cache and most inode caches should fall into this
*/
- free += atomic_read(&slab_reclaim_pages);
+ free += global_page_state(NR_SLAB_RECLAIMABLE);
/*
* Leave the last 3% for root
@@ -1105,12 +1105,6 @@ munmap_back:
goto free_vma;
}
- /* Don't make the VMA automatically writable if it's shared, but the
- * backer wishes to know when pages are first written to */
- if (vma->vm_ops && vma->vm_ops->page_mkwrite)
- vma->vm_page_prot =
- protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)];
-
/* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
* shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
* that memory reservation must be checked; but that reservation
@@ -1128,6 +1122,10 @@ munmap_back:
pgoff = vma->vm_pgoff;
vm_flags = vma->vm_flags;
+ if (vma_wants_writenotify(vma))
+ vma->vm_page_prot =
+ protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)];
+
if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
file = vma->vm_file;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 638edabaff71..955f9d0e38aa 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -27,7 +27,8 @@
#include <asm/tlbflush.h>
static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
- unsigned long addr, unsigned long end, pgprot_t newprot)
+ unsigned long addr, unsigned long end, pgprot_t newprot,
+ int dirty_accountable)
{
pte_t *pte, oldpte;
spinlock_t *ptl;
@@ -42,7 +43,14 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
* bits by wiping the pte and then setting the new pte
* into place.
*/
- ptent = pte_modify(ptep_get_and_clear(mm, addr, pte), newprot);
+ ptent = ptep_get_and_clear(mm, addr, pte);
+ ptent = pte_modify(ptent, newprot);
+ /*
+ * Avoid taking write faults for pages we know to be
+ * dirty.
+ */
+ if (dirty_accountable && pte_dirty(ptent))
+ ptent = pte_mkwrite(ptent);
set_pte_at(mm, addr, pte, ptent);
lazy_mmu_prot_update(ptent);
#ifdef CONFIG_MIGRATION
@@ -66,7 +74,8 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
}
static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
- unsigned long addr, unsigned long end, pgprot_t newprot)
+ unsigned long addr, unsigned long end, pgprot_t newprot,
+ int dirty_accountable)
{
pmd_t *pmd;
unsigned long next;
@@ -76,12 +85,13 @@ static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
next = pmd_addr_end(addr, end);
if (pmd_none_or_clear_bad(pmd))
continue;
- change_pte_range(mm, pmd, addr, next, newprot);
+ change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
} while (pmd++, addr = next, addr != end);
}
static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd,
- unsigned long addr, unsigned long end, pgprot_t newprot)
+ unsigned long addr, unsigned long end, pgprot_t newprot,
+ int dirty_accountable)
{
pud_t *pud;
unsigned long next;
@@ -91,12 +101,13 @@ static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd,
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
- change_pmd_range(mm, pud, addr, next, newprot);
+ change_pmd_range(mm, pud, addr, next, newprot, dirty_accountable);
} while (pud++, addr = next, addr != end);
}
static void change_protection(struct vm_area_struct *vma,
- unsigned long addr, unsigned long end, pgprot_t newprot)
+ unsigned long addr, unsigned long end, pgprot_t newprot,
+ int dirty_accountable)
{
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
@@ -110,7 +121,7 @@ static void change_protection(struct vm_area_struct *vma,
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
- change_pud_range(mm, pgd, addr, next, newprot);
+ change_pud_range(mm, pgd, addr, next, newprot, dirty_accountable);
} while (pgd++, addr = next, addr != end);
flush_tlb_range(vma, start, end);
}
@@ -123,10 +134,9 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
unsigned long oldflags = vma->vm_flags;
long nrpages = (end - start) >> PAGE_SHIFT;
unsigned long charged = 0;
- unsigned int mask;
- pgprot_t newprot;
pgoff_t pgoff;
int error;
+ int dirty_accountable = 0;
if (newflags == oldflags) {
*pprev = vma;
@@ -176,24 +186,23 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
}
success:
- /* Don't make the VMA automatically writable if it's shared, but the
- * backer wishes to know when pages are first written to */
- mask = VM_READ|VM_WRITE|VM_EXEC|VM_SHARED;
- if (vma->vm_ops && vma->vm_ops->page_mkwrite)
- mask &= ~VM_SHARED;
-
- newprot = protection_map[newflags & mask];
-
/*
* vm_flags and vm_page_prot are protected by the mmap_sem
* held in write mode.
*/
vma->vm_flags = newflags;
- vma->vm_page_prot = newprot;
+ vma->vm_page_prot = protection_map[newflags &
+ (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+ if (vma_wants_writenotify(vma)) {
+ vma->vm_page_prot = protection_map[newflags &
+ (VM_READ|VM_WRITE|VM_EXEC)];
+ dirty_accountable = 1;
+ }
+
if (is_vm_hugetlb_page(vma))
- hugetlb_change_protection(vma, start, end, newprot);
+ hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
else
- change_protection(vma, start, end, newprot);
+ change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
vm_stat_account(mm, newflags, vma->vm_file, nrpages);
return 0;
diff --git a/mm/msync.c b/mm/msync.c
index d083544df21b..358d73cf7b78 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -7,149 +7,33 @@
/*
* The msync() system call.
*/
-#include <linux/slab.h>
-#include <linux/pagemap.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/mman.h>
-#include <linux/hugetlb.h>
-#include <linux/writeback.h>
#include <linux/file.h>
#include <linux/syscalls.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-
-static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end)
-{
- pte_t *pte;
- spinlock_t *ptl;
- int progress = 0;
- unsigned long ret = 0;
-
-again:
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- do {
- struct page *page;
-
- if (progress >= 64) {
- progress = 0;
- if (need_resched() || need_lockbreak(ptl))
- break;
- }
- progress++;
- if (!pte_present(*pte))
- continue;
- if (!pte_maybe_dirty(*pte))
- continue;
- page = vm_normal_page(vma, addr, *pte);
- if (!page)
- continue;
- if (ptep_clear_flush_dirty(vma, addr, pte) ||
- page_test_and_clear_dirty(page))
- ret += set_page_dirty(page);
- progress += 3;
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap_unlock(pte - 1, ptl);
- cond_resched();
- if (addr != end)
- goto again;
- return ret;
-}
-
-static inline unsigned long msync_pmd_range(struct vm_area_struct *vma,
- pud_t *pud, unsigned long addr, unsigned long end)
-{
- pmd_t *pmd;
- unsigned long next;
- unsigned long ret = 0;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
- ret += msync_pte_range(vma, pmd, addr, next);
- } while (pmd++, addr = next, addr != end);
- return ret;
-}
-
-static inline unsigned long msync_pud_range(struct vm_area_struct *vma,
- pgd_t *pgd, unsigned long addr, unsigned long end)
-{
- pud_t *pud;
- unsigned long next;
- unsigned long ret = 0;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- ret += msync_pmd_range(vma, pud, addr, next);
- } while (pud++, addr = next, addr != end);
- return ret;
-}
-
-static unsigned long msync_page_range(struct vm_area_struct *vma,
- unsigned long addr, unsigned long end)
-{
- pgd_t *pgd;
- unsigned long next;
- unsigned long ret = 0;
-
- /* For hugepages we can't go walking the page table normally,
- * but that's ok, hugetlbfs is memory based, so we don't need
- * to do anything more on an msync().
- */
- if (vma->vm_flags & VM_HUGETLB)
- return 0;
-
- BUG_ON(addr >= end);
- pgd = pgd_offset(vma->vm_mm, addr);
- flush_cache_range(vma, addr, end);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- ret += msync_pud_range(vma, pgd, addr, next);
- } while (pgd++, addr = next, addr != end);
- return ret;
-}
-
/*
* MS_SYNC syncs the entire file - including mappings.
*
- * MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just
- * marks the relevant pages dirty. The application may now run fsync() to
+ * MS_ASYNC does not start I/O (it used to, up to 2.5.67).
+ * Nor does it marks the relevant pages dirty (it used to up to 2.6.17).
+ * Now it doesn't do anything, since dirty pages are properly tracked.
+ *
+ * The application may now run fsync() to
* write out the dirty pages and wait on the writeout and check the result.
* Or the application may run fadvise(FADV_DONTNEED) against the fd to start
* async writeout immediately.
* So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
* applications.
*/
-static int msync_interval(struct vm_area_struct *vma, unsigned long addr,
- unsigned long end, int flags,
- unsigned long *nr_pages_dirtied)
-{
- struct file *file = vma->vm_file;
-
- if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED))
- return -EBUSY;
-
- if (file && (vma->vm_flags & VM_SHARED))
- *nr_pages_dirtied = msync_page_range(vma, addr, end);
- return 0;
-}
-
asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
{
unsigned long end;
+ struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int unmapped_error = 0;
int error = -EINVAL;
- int done = 0;
if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
goto out;
@@ -169,64 +53,50 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
* If the interval [start,end) covers some unmapped address ranges,
* just ignore them, but return -ENOMEM at the end.
*/
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, start);
- if (!vma) {
- error = -ENOMEM;
- goto out_unlock;
- }
- do {
- unsigned long nr_pages_dirtied = 0;
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+ for (;;) {
struct file *file;
+ /* Still start < end. */
+ error = -ENOMEM;
+ if (!vma)
+ goto out_unlock;
/* Here start < vma->vm_end. */
if (start < vma->vm_start) {
- unmapped_error = -ENOMEM;
start = vma->vm_start;
+ if (start >= end)
+ goto out_unlock;
+ unmapped_error = -ENOMEM;
}
/* Here vma->vm_start <= start < vma->vm_end. */
- if (end <= vma->vm_end) {
- if (start < end) {
- error = msync_interval(vma, start, end, flags,
- &nr_pages_dirtied);
- if (error)
- goto out_unlock;
- }
- error = unmapped_error;
- done = 1;
- } else {
- /* Here vma->vm_start <= start < vma->vm_end < end. */
- error = msync_interval(vma, start, vma->vm_end, flags,
- &nr_pages_dirtied);
- if (error)
- goto out_unlock;
+ if ((flags & MS_INVALIDATE) &&
+ (vma->vm_flags & VM_LOCKED)) {
+ error = -EBUSY;
+ goto out_unlock;
}
file = vma->vm_file;
start = vma->vm_end;
- if ((flags & MS_ASYNC) && file && nr_pages_dirtied) {
- get_file(file);
- up_read(&current->mm->mmap_sem);
- balance_dirty_pages_ratelimited_nr(file->f_mapping,
- nr_pages_dirtied);
- fput(file);
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, start);
- } else if ((flags & MS_SYNC) && file &&
+ if ((flags & MS_SYNC) && file &&
(vma->vm_flags & VM_SHARED)) {
get_file(file);
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
error = do_fsync(file, 0);
fput(file);
- down_read(&current->mm->mmap_sem);
- if (error)
- goto out_unlock;
- vma = find_vma(current->mm, start);
+ if (error || start >= end)
+ goto out;
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, start);
} else {
+ if (start >= end) {
+ error = 0;
+ goto out_unlock;
+ }
vma = vma->vm_next;
}
- } while (vma && !done);
+ }
out_unlock:
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
out:
- return error;
+ return error ? : unmapped_error;
}
diff --git a/mm/nommu.c b/mm/nommu.c
index c576df71e3bb..d99dea31e443 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1133,7 +1133,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
* which are reclaimable, under pressure. The dentry
* cache and most inode caches should fall into this
*/
- free += atomic_read(&slab_reclaim_pages);
+ free += global_page_state(NR_SLAB_RECLAIMABLE);
/*
* Leave the last 3% for root
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b9af136e5cfa..bada3d03119f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -21,6 +21,8 @@
#include <linux/timex.h>
#include <linux/jiffies.h>
#include <linux/cpuset.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
int sysctl_panic_on_oom;
/* #define DEBUG */
@@ -58,6 +60,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
}
/*
+ * swapoff can easily use up all memory, so kill those first.
+ */
+ if (p->flags & PF_SWAPOFF)
+ return ULONG_MAX;
+
+ /*
* The memory size of the process is the basis for the badness.
*/
points = mm->total_vm;
@@ -127,6 +135,14 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
points /= 4;
/*
+ * If p's nodes don't overlap ours, it may still help to kill p
+ * because p may have allocated or otherwise mapped memory on
+ * this node before. However it will be less likely.
+ */
+ if (!cpuset_excl_nodes_overlap(p))
+ points /= 8;
+
+ /*
* Adjust the score by oomkilladj.
*/
if (p->oomkilladj) {
@@ -161,8 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
for (z = zonelist->zones; *z; z++)
if (cpuset_zone_allowed(*z, gfp_mask))
- node_clear((*z)->zone_pgdat->node_id,
- nodes);
+ node_clear(zone_to_nid(*z), nodes);
else
return CONSTRAINT_CPUSET;
@@ -191,25 +206,38 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
unsigned long points;
int releasing;
+ /* skip kernel threads */
+ if (!p->mm)
+ continue;
/* skip the init task with pid == 1 */
if (p->pid == 1)
continue;
- if (p->oomkilladj == OOM_DISABLE)
- continue;
- /* If p's nodes don't overlap ours, it won't help to kill p. */
- if (!cpuset_excl_nodes_overlap(p))
- continue;
/*
* This is in the process of releasing memory so wait for it
* to finish before killing some other task by mistake.
+ *
+ * However, if p is the current task, we allow the 'kill' to
+ * go ahead if it is exiting: this will simply set TIF_MEMDIE,
+ * which will allow it to gain access to memory reserves in
+ * the process of exiting and releasing its resources.
+ * Otherwise we could get an OOM deadlock.
*/
releasing = test_tsk_thread_flag(p, TIF_MEMDIE) ||
p->flags & PF_EXITING;
- if (releasing && !(p->flags & PF_DEAD))
+ if (releasing) {
+ /* PF_DEAD tasks have already released their mm */
+ if (p->flags & PF_DEAD)
+ continue;
+ if (p->flags & PF_EXITING && p == current) {
+ chosen = p;
+ *ppoints = ULONG_MAX;
+ break;
+ }
return ERR_PTR(-1UL);
- if (p->flags & PF_SWAPOFF)
- return p;
+ }
+ if (p->oomkilladj == OOM_DISABLE)
+ continue;
points = badness(p, uptime.tv_sec);
if (points > *ppoints || !chosen) {
@@ -221,9 +249,9 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
}
/**
- * We must be careful though to never send SIGKILL a process with
- * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
- * we select a process with CAP_SYS_RAW_IO set).
+ * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO
+ * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO
+ * set.
*/
static void __oom_kill_task(struct task_struct *p, const char *message)
{
@@ -241,8 +269,11 @@ static void __oom_kill_task(struct task_struct *p, const char *message)
return;
}
task_unlock(p);
- printk(KERN_ERR "%s: Killed process %d (%s).\n",
+
+ if (message) {
+ printk(KERN_ERR "%s: Killed process %d (%s).\n",
message, p->pid, p->comm);
+ }
/*
* We give our sacrificial lamb high priority and access to
@@ -293,8 +324,17 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
struct task_struct *c;
struct list_head *tsk;
- printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and "
- "children.\n", p->pid, p->comm, points);
+ /*
+ * If the task is already exiting, don't alarm the sysadmin or kill
+ * its children or threads, just set TIF_MEMDIE so it can die quickly
+ */
+ if (p->flags & PF_EXITING) {
+ __oom_kill_task(p, NULL);
+ return 0;
+ }
+
+ printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li"
+ " and children.\n", p->pid, p->comm, points);
/* Try to kill a child first */
list_for_each(tsk, &p->children) {
c = list_entry(tsk, struct task_struct, sibling);
@@ -306,6 +346,20 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
return oom_kill_task(p, message);
}
+static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
+
+int register_oom_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&oom_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_oom_notifier);
+
+int unregister_oom_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&oom_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_oom_notifier);
+
/**
* out_of_memory - kill the "best" process when we run out of memory
*
@@ -318,10 +372,17 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
{
struct task_struct *p;
unsigned long points = 0;
+ unsigned long freed = 0;
+
+ blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
+ if (freed > 0)
+ /* Got some memory back in the last second. */
+ return;
if (printk_ratelimit()) {
- printk("oom-killer: gfp_mask=0x%x, order=%d\n",
- gfp_mask, order);
+ printk(KERN_WARNING "%s invoked oom-killer: "
+ "gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
+ current->comm, gfp_mask, order, current->oomkilladj);
dump_stack();
show_mem();
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 77a0bc4e261a..555752907dc3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -23,6 +23,7 @@
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/mpage.h>
+#include <linux/rmap.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/smp.h>
@@ -243,6 +244,16 @@ static void balance_dirty_pages(struct address_space *mapping)
pdflush_operation(background_writeout, 0);
}
+void set_page_dirty_balance(struct page *page)
+{
+ if (set_page_dirty(page)) {
+ struct address_space *mapping = page_mapping(page);
+
+ if (mapping)
+ balance_dirty_pages_ratelimited(mapping);
+ }
+}
+
/**
* balance_dirty_pages_ratelimited_nr - balance dirty memory state
* @mapping: address_space which was dirtied
@@ -550,7 +561,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
return 0;
wbc->for_writepages = 1;
if (mapping->a_ops->writepages)
- ret = mapping->a_ops->writepages(mapping, wbc);
+ ret = mapping->a_ops->writepages(mapping, wbc);
else
ret = generic_writepages(mapping, wbc);
wbc->for_writepages = 0;
@@ -690,7 +701,7 @@ int set_page_dirty_lock(struct page *page)
{
int ret;
- lock_page(page);
+ lock_page_nosync(page);
ret = set_page_dirty(page);
unlock_page(page);
return ret;
@@ -712,9 +723,15 @@ int test_clear_page_dirty(struct page *page)
radix_tree_tag_clear(&mapping->page_tree,
page_index(page),
PAGECACHE_TAG_DIRTY);
- if (mapping_cap_account_dirty(mapping))
- __dec_zone_page_state(page, NR_FILE_DIRTY);
write_unlock_irqrestore(&mapping->tree_lock, flags);
+ /*
+ * We can continue to use `mapping' here because the
+ * page is locked, which pins the address_space
+ */
+ if (mapping_cap_account_dirty(mapping)) {
+ page_mkclean(page);
+ dec_zone_page_state(page, NR_FILE_DIRTY);
+ }
return 1;
}
write_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -744,8 +761,10 @@ int clear_page_dirty_for_io(struct page *page)
if (mapping) {
if (TestClearPageDirty(page)) {
- if (mapping_cap_account_dirty(mapping))
+ if (mapping_cap_account_dirty(mapping)) {
+ page_mkclean(page);
dec_zone_page_state(page, NR_FILE_DIRTY);
+ }
return 1;
}
return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3b5358a0561f..9810f0a60db7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -51,7 +51,6 @@ EXPORT_SYMBOL(node_online_map);
nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
EXPORT_SYMBOL(node_possible_map);
unsigned long totalram_pages __read_mostly;
-unsigned long totalhigh_pages __read_mostly;
unsigned long totalreserve_pages __read_mostly;
long nr_swap_pages;
int percpu_pagelist_fraction;
@@ -69,7 +68,15 @@ static void __free_pages_ok(struct page *page, unsigned int order);
* TBD: should special case ZONE_DMA32 machines here - in those we normally
* don't need any ZONE_NORMAL reservation
*/
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
+ 256,
+#ifdef CONFIG_ZONE_DMA32
+ 256,
+#endif
+#ifdef CONFIG_HIGHMEM
+ 32
+#endif
+};
EXPORT_SYMBOL(totalram_pages);
@@ -80,7 +87,17 @@ EXPORT_SYMBOL(totalram_pages);
struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
EXPORT_SYMBOL(zone_table);
-static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
+static char *zone_names[MAX_NR_ZONES] = {
+ "DMA",
+#ifdef CONFIG_ZONE_DMA32
+ "DMA32",
+#endif
+ "Normal",
+#ifdef CONFIG_HIGHMEM
+ "HighMem"
+#endif
+};
+
int min_free_kbytes = 1024;
unsigned long __meminitdata nr_kernel_pages;
@@ -127,7 +144,6 @@ static int bad_range(struct zone *zone, struct page *page)
return 0;
}
-
#else
static inline int bad_range(struct zone *zone, struct page *page)
{
@@ -218,12 +234,12 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
{
int i;
- BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
+ VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
/*
* clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO
* and __GFP_HIGHMEM from hard or soft interrupt context.
*/
- BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
+ VM_BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
for (i = 0; i < (1 << order); i++)
clear_highpage(page + i);
}
@@ -347,8 +363,8 @@ static inline void __free_one_page(struct page *page,
page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
- BUG_ON(page_idx & (order_size - 1));
- BUG_ON(bad_range(zone, page));
+ VM_BUG_ON(page_idx & (order_size - 1));
+ VM_BUG_ON(bad_range(zone, page));
zone->free_pages += order_size;
while (order < MAX_ORDER-1) {
@@ -421,7 +437,7 @@ static void free_pages_bulk(struct zone *zone, int count,
while (count--) {
struct page *page;
- BUG_ON(list_empty(list));
+ VM_BUG_ON(list_empty(list));
page = list_entry(list->prev, struct page, lru);
/* have to delete it as __free_one_page list manipulates */
list_del(&page->lru);
@@ -432,9 +448,11 @@ static void free_pages_bulk(struct zone *zone, int count,
static void free_one_page(struct zone *zone, struct page *page, int order)
{
- LIST_HEAD(list);
- list_add(&page->lru, &list);
- free_pages_bulk(zone, 1, &list, order);
+ spin_lock(&zone->lock);
+ zone->all_unreclaimable = 0;
+ zone->pages_scanned = 0;
+ __free_one_page(page, zone ,order);
+ spin_unlock(&zone->lock);
}
static void __free_pages_ok(struct page *page, unsigned int order)
@@ -512,7 +530,7 @@ static inline void expand(struct zone *zone, struct page *page,
area--;
high--;
size >>= 1;
- BUG_ON(bad_range(zone, &page[size]));
+ VM_BUG_ON(bad_range(zone, &page[size]));
list_add(&page[size].lru, &area->free_list);
area->nr_free++;
set_page_order(&page[size], high);
@@ -615,19 +633,23 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
#ifdef CONFIG_NUMA
/*
* Called from the slab reaper to drain pagesets on a particular node that
- * belong to the currently executing processor.
+ * belongs to the currently executing processor.
* Note that this function must be called with the thread pinned to
* a single processor.
*/
void drain_node_pages(int nodeid)
{
- int i, z;
+ int i;
+ enum zone_type z;
unsigned long flags;
for (z = 0; z < MAX_NR_ZONES; z++) {
struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
struct per_cpu_pageset *pset;
+ if (!populated_zone(zone))
+ continue;
+
pset = zone_pcp(zone, smp_processor_id());
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
struct per_cpu_pages *pcp;
@@ -672,7 +694,8 @@ static void __drain_pages(unsigned int cpu)
void mark_free_pages(struct zone *zone)
{
- unsigned long zone_pfn, flags;
+ unsigned long pfn, max_zone_pfn;
+ unsigned long flags;
int order;
struct list_head *curr;
@@ -680,18 +703,25 @@ void mark_free_pages(struct zone *zone)
return;
spin_lock_irqsave(&zone->lock, flags);
- for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
- ClearPageNosaveFree(pfn_to_page(zone_pfn + zone->zone_start_pfn));
+
+ max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+ if (pfn_valid(pfn)) {
+ struct page *page = pfn_to_page(pfn);
+
+ if (!PageNosave(page))
+ ClearPageNosaveFree(page);
+ }
for (order = MAX_ORDER - 1; order >= 0; --order)
list_for_each(curr, &zone->free_area[order].free_list) {
- unsigned long start_pfn, i;
+ unsigned long i;
- start_pfn = page_to_pfn(list_entry(curr, struct page, lru));
+ pfn = page_to_pfn(list_entry(curr, struct page, lru));
+ for (i = 0; i < (1UL << order); i++)
+ SetPageNosaveFree(pfn_to_page(pfn + i));
+ }
- for (i=0; i < (1<<order); i++)
- SetPageNosaveFree(pfn_to_page(start_pfn+i));
- }
spin_unlock_irqrestore(&zone->lock, flags);
}
@@ -761,8 +791,8 @@ void split_page(struct page *page, unsigned int order)
{
int i;
- BUG_ON(PageCompound(page));
- BUG_ON(!page_count(page));
+ VM_BUG_ON(PageCompound(page));
+ VM_BUG_ON(!page_count(page));
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
}
@@ -809,7 +839,7 @@ again:
local_irq_restore(flags);
put_cpu();
- BUG_ON(bad_range(zone, page));
+ VM_BUG_ON(bad_range(zone, page));
if (prep_new_page(page, order, gfp_flags))
goto again;
return page;
@@ -870,32 +900,37 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
struct zone **z = zonelist->zones;
struct page *page = NULL;
int classzone_idx = zone_idx(*z);
+ struct zone *zone;
/*
* Go through the zonelist once, looking for a zone with enough free.
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/
do {
+ zone = *z;
+ if (unlikely((gfp_mask & __GFP_THISNODE) &&
+ zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
+ break;
if ((alloc_flags & ALLOC_CPUSET) &&
- !cpuset_zone_allowed(*z, gfp_mask))
+ !cpuset_zone_allowed(zone, gfp_mask))
continue;
if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
unsigned long mark;
if (alloc_flags & ALLOC_WMARK_MIN)
- mark = (*z)->pages_min;
+ mark = zone->pages_min;
else if (alloc_flags & ALLOC_WMARK_LOW)
- mark = (*z)->pages_low;
+ mark = zone->pages_low;
else
- mark = (*z)->pages_high;
- if (!zone_watermark_ok(*z, order, mark,
+ mark = zone->pages_high;
+ if (!zone_watermark_ok(zone , order, mark,
classzone_idx, alloc_flags))
if (!zone_reclaim_mode ||
- !zone_reclaim(*z, gfp_mask, order))
+ !zone_reclaim(zone, gfp_mask, order))
continue;
}
- page = buffered_rmqueue(zonelist, *z, order, gfp_mask);
+ page = buffered_rmqueue(zonelist, zone, order, gfp_mask);
if (page) {
break;
}
@@ -1083,7 +1118,7 @@ fastcall unsigned long get_zeroed_page(gfp_t gfp_mask)
* get_zeroed_page() returns a 32-bit address, which cannot represent
* a highmem page
*/
- BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
+ VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
if (page)
@@ -1116,7 +1151,7 @@ EXPORT_SYMBOL(__free_pages);
fastcall void free_pages(unsigned long addr, unsigned int order)
{
if (addr != 0) {
- BUG_ON(!virt_addr_valid((void *)addr));
+ VM_BUG_ON(!virt_addr_valid((void *)addr));
__free_pages(virt_to_page((void *)addr), order);
}
}
@@ -1142,7 +1177,8 @@ EXPORT_SYMBOL(nr_free_pages);
#ifdef CONFIG_NUMA
unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
{
- unsigned int i, sum = 0;
+ unsigned int sum = 0;
+ enum zone_type i;
for (i = 0; i < MAX_NR_ZONES; i++)
sum += pgdat->node_zones[i].free_pages;
@@ -1186,24 +1222,10 @@ unsigned int nr_free_pagecache_pages(void)
{
return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
}
-
-#ifdef CONFIG_HIGHMEM
-unsigned int nr_free_highpages (void)
-{
- pg_data_t *pgdat;
- unsigned int pages = 0;
-
- for_each_online_pgdat(pgdat)
- pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
-
- return pages;
-}
-#endif
-
#ifdef CONFIG_NUMA
static void show_node(struct zone *zone)
{
- printk("Node %d ", zone->zone_pgdat->node_id);
+ printk("Node %ld ", zone_to_nid(zone));
}
#else
#define show_node(zone) do { } while (0)
@@ -1215,13 +1237,8 @@ void si_meminfo(struct sysinfo *val)
val->sharedram = 0;
val->freeram = nr_free_pages();
val->bufferram = nr_blockdev_pages();
-#ifdef CONFIG_HIGHMEM
val->totalhigh = totalhigh_pages;
val->freehigh = nr_free_highpages();
-#else
- val->totalhigh = 0;
- val->freehigh = 0;
-#endif
val->mem_unit = PAGE_SIZE;
}
@@ -1234,8 +1251,13 @@ void si_meminfo_node(struct sysinfo *val, int nid)
val->totalram = pgdat->node_present_pages;
val->freeram = nr_free_pages_pgdat(pgdat);
+#ifdef CONFIG_HIGHMEM
val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+#else
+ val->totalhigh = 0;
+ val->freehigh = 0;
+#endif
val->mem_unit = PAGE_SIZE;
}
#endif
@@ -1282,10 +1304,6 @@ void show_free_areas(void)
get_zone_counts(&active, &inactive, &free);
- printk("Free pages: %11ukB (%ukB HighMem)\n",
- K(nr_free_pages()),
- K(nr_free_highpages()));
-
printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu "
"unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n",
active,
@@ -1294,7 +1312,8 @@ void show_free_areas(void)
global_page_state(NR_WRITEBACK),
global_page_state(NR_UNSTABLE_NFS),
nr_free_pages(),
- global_page_state(NR_SLAB),
+ global_page_state(NR_SLAB_RECLAIMABLE) +
+ global_page_state(NR_SLAB_UNRECLAIMABLE),
global_page_state(NR_FILE_MAPPED),
global_page_state(NR_PAGETABLE));
@@ -1360,39 +1379,25 @@ void show_free_areas(void)
* Add all populated zones of a node to the zonelist.
*/
static int __meminit build_zonelists_node(pg_data_t *pgdat,
- struct zonelist *zonelist, int nr_zones, int zone_type)
+ struct zonelist *zonelist, int nr_zones, enum zone_type zone_type)
{
struct zone *zone;
- BUG_ON(zone_type > ZONE_HIGHMEM);
+ BUG_ON(zone_type >= MAX_NR_ZONES);
+ zone_type++;
do {
+ zone_type--;
zone = pgdat->node_zones + zone_type;
if (populated_zone(zone)) {
-#ifndef CONFIG_HIGHMEM
- BUG_ON(zone_type > ZONE_NORMAL);
-#endif
zonelist->zones[nr_zones++] = zone;
check_highest_zone(zone_type);
}
- zone_type--;
- } while (zone_type >= 0);
+ } while (zone_type);
return nr_zones;
}
-static inline int highest_zone(int zone_bits)
-{
- int res = ZONE_NORMAL;
- if (zone_bits & (__force int)__GFP_HIGHMEM)
- res = ZONE_HIGHMEM;
- if (zone_bits & (__force int)__GFP_DMA32)
- res = ZONE_DMA32;
- if (zone_bits & (__force int)__GFP_DMA)
- res = ZONE_DMA;
- return res;
-}
-
#ifdef CONFIG_NUMA
#define MAX_NODE_LOAD (num_online_nodes())
static int __meminitdata node_load[MAX_NUMNODES];
@@ -1458,13 +1463,14 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask)
static void __meminit build_zonelists(pg_data_t *pgdat)
{
- int i, j, k, node, local_node;
+ int j, node, local_node;
+ enum zone_type i;
int prev_node, load;
struct zonelist *zonelist;
nodemask_t used_mask;
/* initialize zonelists */
- for (i = 0; i < GFP_ZONETYPES; i++) {
+ for (i = 0; i < MAX_NR_ZONES; i++) {
zonelist = pgdat->node_zonelists + i;
zonelist->zones[0] = NULL;
}
@@ -1494,13 +1500,11 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
node_load[node] += load;
prev_node = node;
load--;
- for (i = 0; i < GFP_ZONETYPES; i++) {
+ for (i = 0; i < MAX_NR_ZONES; i++) {
zonelist = pgdat->node_zonelists + i;
for (j = 0; zonelist->zones[j] != NULL; j++);
- k = highest_zone(i);
-
- j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+ j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
zonelist->zones[j] = NULL;
}
}
@@ -1510,17 +1514,16 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
static void __meminit build_zonelists(pg_data_t *pgdat)
{
- int i, j, k, node, local_node;
+ int node, local_node;
+ enum zone_type i,j;
local_node = pgdat->node_id;
- for (i = 0; i < GFP_ZONETYPES; i++) {
+ for (i = 0; i < MAX_NR_ZONES; i++) {
struct zonelist *zonelist;
zonelist = pgdat->node_zonelists + i;
- j = 0;
- k = highest_zone(i);
- j = build_zonelists_node(pgdat, zonelist, j, k);
+ j = build_zonelists_node(pgdat, zonelist, 0, i);
/*
* Now we build the zonelist so that it contains the zones
* of all the other nodes.
@@ -1532,12 +1535,12 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
for (node = local_node + 1; node < MAX_NUMNODES; node++) {
if (!node_online(node))
continue;
- j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+ j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
}
for (node = 0; node < local_node; node++) {
if (!node_online(node))
continue;
- j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+ j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
}
zonelist->zones[j] = NULL;
@@ -1643,7 +1646,7 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
unsigned long *zones_size, unsigned long *zholes_size)
{
unsigned long realtotalpages, totalpages = 0;
- int i;
+ enum zone_type i;
for (i = 0; i < MAX_NR_ZONES; i++)
totalpages += zones_size[i];
@@ -1698,8 +1701,8 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
}
#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr)
-void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn,
- unsigned long size)
+void zonetable_add(struct zone *zone, int nid, enum zone_type zid,
+ unsigned long pfn, unsigned long size)
{
unsigned long snum = pfn_to_section_nr(pfn);
unsigned long end = pfn_to_section_nr(pfn + size);
@@ -1845,8 +1848,10 @@ static inline void free_zone_pagesets(int cpu)
for_each_zone(zone) {
struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
+ /* Free per_cpu_pageset if it is slab allocated */
+ if (pset != &boot_pageset[cpu])
+ kfree(pset);
zone_pcp(zone, cpu) = NULL;
- kfree(pset);
}
}
@@ -1981,7 +1986,7 @@ __meminit int init_currently_empty_zone(struct zone *zone,
static void __meminit free_area_init_core(struct pglist_data *pgdat,
unsigned long *zones_size, unsigned long *zholes_size)
{
- unsigned long j;
+ enum zone_type j;
int nid = pgdat->node_id;
unsigned long zone_start_pfn = pgdat->node_start_pfn;
int ret;
@@ -1999,15 +2004,16 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
if (zholes_size)
realsize -= zholes_size[j];
- if (j < ZONE_HIGHMEM)
+ if (!is_highmem_idx(j))
nr_kernel_pages += realsize;
nr_all_pages += realsize;
zone->spanned_pages = size;
zone->present_pages = realsize;
#ifdef CONFIG_NUMA
- zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio)
+ zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
/ 100;
+ zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
#endif
zone->name = zone_names[j];
spin_lock_init(&zone->lock);
@@ -2129,7 +2135,7 @@ static void calculate_totalreserve_pages(void)
{
struct pglist_data *pgdat;
unsigned long reserve_pages = 0;
- int i, j;
+ enum zone_type i, j;
for_each_online_pgdat(pgdat) {
for (i = 0; i < MAX_NR_ZONES; i++) {
@@ -2162,7 +2168,7 @@ static void calculate_totalreserve_pages(void)
static void setup_per_zone_lowmem_reserve(void)
{
struct pglist_data *pgdat;
- int j, idx;
+ enum zone_type j, idx;
for_each_online_pgdat(pgdat) {
for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -2171,9 +2177,12 @@ static void setup_per_zone_lowmem_reserve(void)
zone->lowmem_reserve[j] = 0;
- for (idx = j-1; idx >= 0; idx--) {
+ idx = j;
+ while (idx) {
struct zone *lower_zone;
+ idx--;
+
if (sysctl_lowmem_reserve_ratio[idx] < 1)
sysctl_lowmem_reserve_ratio[idx] = 1;
@@ -2314,10 +2323,26 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
return rc;
for_each_zone(zone)
- zone->min_unmapped_ratio = (zone->present_pages *
+ zone->min_unmapped_pages = (zone->present_pages *
sysctl_min_unmapped_ratio) / 100;
return 0;
}
+
+int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
+ struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+ struct zone *zone;
+ int rc;
+
+ rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ if (rc)
+ return rc;
+
+ for_each_zone(zone)
+ zone->min_slab_pages = (zone->present_pages *
+ sysctl_min_slab_ratio) / 100;
+ return 0;
+}
#endif
/*
diff --git a/mm/page_io.c b/mm/page_io.c
index 88029948d00a..d4840ecbf8f9 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -52,14 +52,29 @@ static int end_swap_bio_write(struct bio *bio, unsigned int bytes_done, int err)
if (bio->bi_size)
return 1;
- if (!uptodate)
+ if (!uptodate) {
SetPageError(page);
+ /*
+ * We failed to write the page out to swap-space.
+ * Re-dirty the page in order to avoid it being reclaimed.
+ * Also print a dire warning that things will go BAD (tm)
+ * very quickly.
+ *
+ * Also clear PG_reclaim to avoid rotate_reclaimable_page()
+ */
+ set_page_dirty(page);
+ printk(KERN_ALERT "Write-error on swap-device (%u:%u:%Lu)\n",
+ imajor(bio->bi_bdev->bd_inode),
+ iminor(bio->bi_bdev->bd_inode),
+ (unsigned long long)bio->bi_sector);
+ ClearPageReclaim(page);
+ }
end_page_writeback(page);
bio_put(bio);
return 0;
}
-static int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
+int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct page *page = bio->bi_io_vec[0].bv_page;
@@ -70,6 +85,10 @@ static int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
if (!uptodate) {
SetPageError(page);
ClearPageUptodate(page);
+ printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
+ imajor(bio->bi_bdev->bd_inode),
+ iminor(bio->bi_bdev->bd_inode),
+ (unsigned long long)bio->bi_sector);
} else {
SetPageUptodate(page);
}
@@ -137,10 +156,12 @@ out:
* We use end_swap_bio_read() even for writes, because it happens to do what
* we want.
*/
-int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page)
+int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
+ struct bio **bio_chain)
{
struct bio *bio;
int ret = 0;
+ int bio_rw;
lock_page(page);
@@ -151,11 +172,22 @@ int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page)
goto out;
}
- submit_bio(rw | (1 << BIO_RW_SYNC), bio);
- wait_on_page_locked(page);
-
- if (!PageUptodate(page) || PageError(page))
- ret = -EIO;
+ bio_rw = rw;
+ if (!bio_chain)
+ bio_rw |= (1 << BIO_RW_SYNC);
+ if (bio_chain)
+ bio_get(bio);
+ submit_bio(bio_rw, bio);
+ if (bio_chain == NULL) {
+ wait_on_page_locked(page);
+
+ if (!PageUptodate(page) || PageError(page))
+ ret = -EIO;
+ }
+ if (bio_chain) {
+ bio->bi_private = *bio_chain;
+ *bio_chain = bio;
+ }
out:
return ret;
}
diff --git a/mm/rmap.c b/mm/rmap.c
index 40158b59729e..e2155d791d99 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -434,6 +434,71 @@ int page_referenced(struct page *page, int is_locked)
return referenced;
}
+static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long address;
+ pte_t *pte, entry;
+ spinlock_t *ptl;
+ int ret = 0;
+
+ address = vma_address(page, vma);
+ if (address == -EFAULT)
+ goto out;
+
+ pte = page_check_address(page, mm, address, &ptl);
+ if (!pte)
+ goto out;
+
+ if (!pte_dirty(*pte) && !pte_write(*pte))
+ goto unlock;
+
+ entry = ptep_get_and_clear(mm, address, pte);
+ entry = pte_mkclean(entry);
+ entry = pte_wrprotect(entry);
+ ptep_establish(vma, address, pte, entry);
+ lazy_mmu_prot_update(entry);
+ ret = 1;
+
+unlock:
+ pte_unmap_unlock(pte, ptl);
+out:
+ return ret;
+}
+
+static int page_mkclean_file(struct address_space *mapping, struct page *page)
+{
+ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ struct vm_area_struct *vma;
+ struct prio_tree_iter iter;
+ int ret = 0;
+
+ BUG_ON(PageAnon(page));
+
+ spin_lock(&mapping->i_mmap_lock);
+ vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+ if (vma->vm_flags & VM_SHARED)
+ ret += page_mkclean_one(page, vma);
+ }
+ spin_unlock(&mapping->i_mmap_lock);
+ return ret;
+}
+
+int page_mkclean(struct page *page)
+{
+ int ret = 0;
+
+ BUG_ON(!PageLocked(page));
+
+ if (page_mapped(page)) {
+ struct address_space *mapping = page_mapping(page);
+ if (mapping)
+ ret = page_mkclean_file(mapping, page);
+ }
+
+ return ret;
+}
+
/**
* page_set_anon_rmap - setup new anonymous rmap
* @page: the page to add the mapping to
diff --git a/mm/shmem.c b/mm/shmem.c
index db21c51531ca..8631be45b40d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -45,6 +45,7 @@
#include <linux/namei.h>
#include <linux/ctype.h>
#include <linux/migrate.h>
+#include <linux/highmem.h>
#include <asm/uaccess.h>
#include <asm/div64.h>
diff --git a/mm/slab.c b/mm/slab.c
index 21ba06035700..7a48eb1a60c8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -313,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache,
struct kmem_list3 *l3, int tofree);
static void free_block(struct kmem_cache *cachep, void **objpp, int len,
int node);
-static void enable_cpucache(struct kmem_cache *cachep);
+static int enable_cpucache(struct kmem_cache *cachep);
static void cache_reap(void *unused);
/*
@@ -674,6 +674,8 @@ static struct kmem_cache cache_cache = {
#endif
};
+#define BAD_ALIEN_MAGIC 0x01020304ul
+
#ifdef CONFIG_LOCKDEP
/*
@@ -682,42 +684,58 @@ static struct kmem_cache cache_cache = {
* The locking for this is tricky in that it nests within the locks
* of all other slabs in a few places; to deal with this special
* locking we put on-slab caches into a separate lock-class.
+ *
+ * We set lock class for alien array caches which are up during init.
+ * The lock annotation will be lost if all cpus of a node goes down and
+ * then comes back up during hotplug
*/
-static struct lock_class_key on_slab_key;
+static struct lock_class_key on_slab_l3_key;
+static struct lock_class_key on_slab_alc_key;
+
+static inline void init_lock_keys(void)
-static inline void init_lock_keys(struct cache_sizes *s)
{
int q;
-
- for (q = 0; q < MAX_NUMNODES; q++) {
- if (!s->cs_cachep->nodelists[q] || OFF_SLAB(s->cs_cachep))
- continue;
- lockdep_set_class(&s->cs_cachep->nodelists[q]->list_lock,
- &on_slab_key);
+ struct cache_sizes *s = malloc_sizes;
+
+ while (s->cs_size != ULONG_MAX) {
+ for_each_node(q) {
+ struct array_cache **alc;
+ int r;
+ struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
+ if (!l3 || OFF_SLAB(s->cs_cachep))
+ continue;
+ lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
+ alc = l3->alien;
+ /*
+ * FIXME: This check for BAD_ALIEN_MAGIC
+ * should go away when common slab code is taught to
+ * work even without alien caches.
+ * Currently, non NUMA code returns BAD_ALIEN_MAGIC
+ * for alloc_alien_cache,
+ */
+ if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
+ continue;
+ for_each_node(r) {
+ if (alc[r])
+ lockdep_set_class(&alc[r]->lock,
+ &on_slab_alc_key);
+ }
+ }
+ s++;
}
}
-
#else
-static inline void init_lock_keys(struct cache_sizes *s)
+static inline void init_lock_keys(void)
{
}
#endif
-
-
/* Guard access to the cache-chain. */
static DEFINE_MUTEX(cache_chain_mutex);
static struct list_head cache_chain;
/*
- * vm_enough_memory() looks at this to determine how many slab-allocated pages
- * are possibly freeable under pressure
- *
- * SLAB_RECLAIM_ACCOUNT turns this on per-slab
- */
-atomic_t slab_reclaim_pages;
-
-/*
* chicken and egg problem: delay the per-cpu array allocation
* until the general caches are up.
*/
@@ -768,11 +786,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size,
return csizep->cs_cachep;
}
-struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
+static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
{
return __find_general_cachep(size, gfpflags);
}
-EXPORT_SYMBOL(kmem_find_general_cachep);
static size_t slab_mgmt_size(size_t nr_objs, size_t align)
{
@@ -1092,7 +1109,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
static inline struct array_cache **alloc_alien_cache(int node, int limit)
{
- return (struct array_cache **) 0x01020304ul;
+ return (struct array_cache **)BAD_ALIEN_MAGIC;
}
static inline void free_alien_cache(struct array_cache **ac_ptr)
@@ -1422,7 +1439,6 @@ void __init kmem_cache_init(void)
ARCH_KMALLOC_FLAGS|SLAB_PANIC,
NULL, NULL);
}
- init_lock_keys(sizes);
sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
sizes->cs_size,
@@ -1491,10 +1507,15 @@ void __init kmem_cache_init(void)
struct kmem_cache *cachep;
mutex_lock(&cache_chain_mutex);
list_for_each_entry(cachep, &cache_chain, next)
- enable_cpucache(cachep);
+ if (enable_cpucache(cachep))
+ BUG();
mutex_unlock(&cache_chain_mutex);
}
+ /* Annotate slab for lockdep -- annotate the malloc caches */
+ init_lock_keys();
+
+
/* Done! */
g_cpucache_up = FULL;
@@ -1551,8 +1572,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
nr_pages = (1 << cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
- atomic_add(nr_pages, &slab_reclaim_pages);
- add_zone_page_state(page_zone(page), NR_SLAB, nr_pages);
+ add_zone_page_state(page_zone(page),
+ NR_SLAB_RECLAIMABLE, nr_pages);
+ else
+ add_zone_page_state(page_zone(page),
+ NR_SLAB_UNRECLAIMABLE, nr_pages);
for (i = 0; i < nr_pages; i++)
__SetPageSlab(page + i);
return page_address(page);
@@ -1567,7 +1591,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
struct page *page = virt_to_page(addr);
const unsigned long nr_freed = i;
- sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed);
+ if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+ sub_zone_page_state(page_zone(page),
+ NR_SLAB_RECLAIMABLE, nr_freed);
+ else
+ sub_zone_page_state(page_zone(page),
+ NR_SLAB_UNRECLAIMABLE, nr_freed);
while (i--) {
BUG_ON(!PageSlab(page));
__ClearPageSlab(page);
@@ -1576,8 +1605,6 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += nr_freed;
free_pages((unsigned long)addr, cachep->gfporder);
- if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
- atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages);
}
static void kmem_rcu_free(struct rcu_head *head)
@@ -1834,6 +1861,27 @@ static void set_up_list3s(struct kmem_cache *cachep, int index)
}
}
+static void __kmem_cache_destroy(struct kmem_cache *cachep)
+{
+ int i;
+ struct kmem_list3 *l3;
+
+ for_each_online_cpu(i)
+ kfree(cachep->array[i]);
+
+ /* NUMA: free the list3 structures */
+ for_each_online_node(i) {
+ l3 = cachep->nodelists[i];
+ if (l3) {
+ kfree(l3->shared);
+ free_alien_cache(l3->alien);
+ kfree(l3);
+ }
+ }
+ kmem_cache_free(&cache_cache, cachep);
+}
+
+
/**
* calculate_slab_order - calculate size (page order) of slabs
* @cachep: pointer to the cache that is being created
@@ -1904,12 +1952,11 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
return left_over;
}
-static void setup_cpu_cache(struct kmem_cache *cachep)
+static int setup_cpu_cache(struct kmem_cache *cachep)
{
- if (g_cpucache_up == FULL) {
- enable_cpucache(cachep);
- return;
- }
+ if (g_cpucache_up == FULL)
+ return enable_cpucache(cachep);
+
if (g_cpucache_up == NONE) {
/*
* Note: the first kmem_cache_create must create the cache
@@ -1956,6 +2003,7 @@ static void setup_cpu_cache(struct kmem_cache *cachep)
cpu_cache_get(cachep)->touched = 0;
cachep->batchcount = 1;
cachep->limit = BOOT_CPUCACHE_ENTRIES;
+ return 0;
}
/**
@@ -2097,6 +2145,15 @@ kmem_cache_create (const char *name, size_t size, size_t align,
} else {
ralign = BYTES_PER_WORD;
}
+
+ /*
+ * Redzoning and user store require word alignment. Note this will be
+ * overridden by architecture or caller mandated alignment if either
+ * is greater than BYTES_PER_WORD.
+ */
+ if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
+ ralign = BYTES_PER_WORD;
+
/* 2) arch mandated alignment: disables debug if necessary */
if (ralign < ARCH_SLAB_MINALIGN) {
ralign = ARCH_SLAB_MINALIGN;
@@ -2110,8 +2167,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
}
/*
- * 4) Store it. Note that the debug code below can reduce
- * the alignment to BYTES_PER_WORD.
+ * 4) Store it.
*/
align = ralign;
@@ -2123,20 +2179,19 @@ kmem_cache_create (const char *name, size_t size, size_t align,
#if DEBUG
cachep->obj_size = size;
+ /*
+ * Both debugging options require word-alignment which is calculated
+ * into align above.
+ */
if (flags & SLAB_RED_ZONE) {
- /* redzoning only works with word aligned caches */
- align = BYTES_PER_WORD;
-
/* add space for red zone words */
cachep->obj_offset += BYTES_PER_WORD;
size += 2 * BYTES_PER_WORD;
}
if (flags & SLAB_STORE_USER) {
- /* user store requires word alignment and
- * one word storage behind the end of the real
- * object.
+ /* user store requires one word storage behind the end of
+ * the real object.
*/
- align = BYTES_PER_WORD;
size += BYTES_PER_WORD;
}
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
@@ -2200,14 +2255,26 @@ kmem_cache_create (const char *name, size_t size, size_t align,
cachep->gfpflags |= GFP_DMA;
cachep->buffer_size = size;
- if (flags & CFLGS_OFF_SLAB)
+ if (flags & CFLGS_OFF_SLAB) {
cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
+ /*
+ * This is a possibility for one of the malloc_sizes caches.
+ * But since we go off slab only for object size greater than
+ * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
+ * this should not happen at all.
+ * But leave a BUG_ON for some lucky dude.
+ */
+ BUG_ON(!cachep->slabp_cache);
+ }
cachep->ctor = ctor;
cachep->dtor = dtor;
cachep->name = name;
-
- setup_cpu_cache(cachep);
+ if (setup_cpu_cache(cachep)) {
+ __kmem_cache_destroy(cachep);
+ cachep = NULL;
+ goto oops;
+ }
/* cache setup completed, link it into the list */
list_add(&cachep->next, &cache_chain);
@@ -2389,9 +2456,6 @@ EXPORT_SYMBOL(kmem_cache_shrink);
*/
int kmem_cache_destroy(struct kmem_cache *cachep)
{
- int i;
- struct kmem_list3 *l3;
-
BUG_ON(!cachep || in_interrupt());
/* Don't let CPUs to come and go */
@@ -2417,25 +2481,23 @@ int kmem_cache_destroy(struct kmem_cache *cachep)
if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
synchronize_rcu();
- for_each_online_cpu(i)
- kfree(cachep->array[i]);
-
- /* NUMA: free the list3 structures */
- for_each_online_node(i) {
- l3 = cachep->nodelists[i];
- if (l3) {
- kfree(l3->shared);
- free_alien_cache(l3->alien);
- kfree(l3);
- }
- }
- kmem_cache_free(&cache_cache, cachep);
+ __kmem_cache_destroy(cachep);
unlock_cpu_hotplug();
return 0;
}
EXPORT_SYMBOL(kmem_cache_destroy);
-/* Get the memory for a slab management obj. */
+/*
+ * Get the memory for a slab management obj.
+ * For a slab cache when the slab descriptor is off-slab, slab descriptors
+ * always come from malloc_sizes caches. The slab descriptor cannot
+ * come from the same cache which is getting created because,
+ * when we are searching for an appropriate cache for these
+ * descriptors in kmem_cache_create, we search through the malloc_sizes array.
+ * If we are creating a malloc_sizes cache here it would not be visible to
+ * kmem_find_general_cachep till the initialization is complete.
+ * Hence we cannot have slabp_cache same as the original cache.
+ */
static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
int colour_off, gfp_t local_flags,
int nodeid)
@@ -3119,6 +3181,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
if (slabp->inuse == 0) {
if (l3->free_objects > l3->free_limit) {
l3->free_objects -= cachep->num;
+ /* No need to drop any previously held
+ * lock here, even if we have a off-slab slab
+ * descriptor it is guaranteed to come from
+ * a different cache, refer to comments before
+ * alloc_slabmgmt.
+ */
slab_destroy(cachep, slabp);
} else {
list_add(&slabp->list, &l3->slabs_free);
@@ -3317,7 +3385,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
-void *kmalloc_node(size_t size, gfp_t flags, int node)
+void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
struct kmem_cache *cachep;
@@ -3326,7 +3394,7 @@ void *kmalloc_node(size_t size, gfp_t flags, int node)
return NULL;
return kmem_cache_alloc_node(cachep, flags, node);
}
-EXPORT_SYMBOL(kmalloc_node);
+EXPORT_SYMBOL(__kmalloc_node);
#endif
/**
@@ -3370,55 +3438,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
EXPORT_SYMBOL(__kmalloc_track_caller);
#endif
-#ifdef CONFIG_SMP
-/**
- * __alloc_percpu - allocate one copy of the object for every present
- * cpu in the system, zeroing them.
- * Objects should be dereferenced using the per_cpu_ptr macro only.
- *
- * @size: how many bytes of memory are required.
- */
-void *__alloc_percpu(size_t size)
-{
- int i;
- struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
-
- if (!pdata)
- return NULL;
-
- /*
- * Cannot use for_each_online_cpu since a cpu may come online
- * and we have no way of figuring out how to fix the array
- * that we have allocated then....
- */
- for_each_possible_cpu(i) {
- int node = cpu_to_node(i);
-
- if (node_online(node))
- pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node);
- else
- pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
-
- if (!pdata->ptrs[i])
- goto unwind_oom;
- memset(pdata->ptrs[i], 0, size);
- }
-
- /* Catch derefs w/o wrappers */
- return (void *)(~(unsigned long)pdata);
-
-unwind_oom:
- while (--i >= 0) {
- if (!cpu_possible(i))
- continue;
- kfree(pdata->ptrs[i]);
- }
- kfree(pdata);
- return NULL;
-}
-EXPORT_SYMBOL(__alloc_percpu);
-#endif
-
/**
* kmem_cache_free - Deallocate an object
* @cachep: The cache the allocation was from.
@@ -3464,29 +3483,6 @@ void kfree(const void *objp)
}
EXPORT_SYMBOL(kfree);
-#ifdef CONFIG_SMP
-/**
- * free_percpu - free previously allocated percpu memory
- * @objp: pointer returned by alloc_percpu.
- *
- * Don't free memory not originally allocated by alloc_percpu()
- * The complemented objp is to check for that.
- */
-void free_percpu(const void *objp)
-{
- int i;
- struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp);
-
- /*
- * We allocate for all cpus so we cannot use for online cpu here.
- */
- for_each_possible_cpu(i)
- kfree(p->ptrs[i]);
- kfree(p);
-}
-EXPORT_SYMBOL(free_percpu);
-#endif
-
unsigned int kmem_cache_size(struct kmem_cache *cachep)
{
return obj_size(cachep);
@@ -3603,22 +3599,26 @@ static void do_ccupdate_local(void *info)
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
int batchcount, int shared)
{
- struct ccupdate_struct new;
- int i, err;
+ struct ccupdate_struct *new;
+ int i;
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
- memset(&new.new, 0, sizeof(new.new));
for_each_online_cpu(i) {
- new.new[i] = alloc_arraycache(cpu_to_node(i), limit,
+ new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
batchcount);
- if (!new.new[i]) {
+ if (!new->new[i]) {
for (i--; i >= 0; i--)
- kfree(new.new[i]);
+ kfree(new->new[i]);
+ kfree(new);
return -ENOMEM;
}
}
- new.cachep = cachep;
+ new->cachep = cachep;
- on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1);
+ on_each_cpu(do_ccupdate_local, (void *)new, 1, 1);
check_irq_on();
cachep->batchcount = batchcount;
@@ -3626,7 +3626,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
cachep->shared = shared;
for_each_online_cpu(i) {
- struct array_cache *ccold = new.new[i];
+ struct array_cache *ccold = new->new[i];
if (!ccold)
continue;
spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
@@ -3634,18 +3634,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
kfree(ccold);
}
-
- err = alloc_kmemlist(cachep);
- if (err) {
- printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n",
- cachep->name, -err);
- BUG();
- }
- return 0;
+ kfree(new);
+ return alloc_kmemlist(cachep);
}
/* Called with cache_chain_mutex held always */
-static void enable_cpucache(struct kmem_cache *cachep)
+static int enable_cpucache(struct kmem_cache *cachep)
{
int err;
int limit, shared;
@@ -3697,6 +3691,7 @@ static void enable_cpucache(struct kmem_cache *cachep)
if (err)
printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
cachep->name, -err);
+ return err;
}
/*
@@ -4157,6 +4152,7 @@ static int leaks_show(struct seq_file *m, void *p)
show_symbol(m, n[2*i+2]);
seq_putc(m, '\n');
}
+
return 0;
}
diff --git a/mm/slob.c b/mm/slob.c
index 7b52b20b9607..20188627347c 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -339,52 +339,3 @@ void kmem_cache_init(void)
mod_timer(&slob_timer, jiffies + HZ);
}
-
-atomic_t slab_reclaim_pages = ATOMIC_INIT(0);
-EXPORT_SYMBOL(slab_reclaim_pages);
-
-#ifdef CONFIG_SMP
-
-void *__alloc_percpu(size_t size)
-{
- int i;
- struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL);
-
- if (!pdata)
- return NULL;
-
- for_each_possible_cpu(i) {
- pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
- if (!pdata->ptrs[i])
- goto unwind_oom;
- memset(pdata->ptrs[i], 0, size);
- }
-
- /* Catch derefs w/o wrappers */
- return (void *) (~(unsigned long) pdata);
-
-unwind_oom:
- while (--i >= 0) {
- if (!cpu_possible(i))
- continue;
- kfree(pdata->ptrs[i]);
- }
- kfree(pdata);
- return NULL;
-}
-EXPORT_SYMBOL(__alloc_percpu);
-
-void
-free_percpu(const void *objp)
-{
- int i;
- struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
-
- for_each_possible_cpu(i)
- kfree(p->ptrs[i]);
-
- kfree(p);
-}
-EXPORT_SYMBOL(free_percpu);
-
-#endif
diff --git a/mm/swap.c b/mm/swap.c
index 687686a61f7c..2e0e871f542f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -34,6 +34,25 @@
/* How many pages do we try to swap or page in/out together? */
int page_cluster;
+/*
+ * This path almost never happens for VM activity - pages are normally
+ * freed via pagevecs. But it gets used by networking.
+ */
+static void fastcall __page_cache_release(struct page *page)
+{
+ if (PageLRU(page)) {
+ unsigned long flags;
+ struct zone *zone = page_zone(page);
+
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ VM_BUG_ON(!PageLRU(page));
+ __ClearPageLRU(page);
+ del_page_from_lru(zone, page);
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
+ }
+ free_hot_page(page);
+}
+
static void put_compound_page(struct page *page)
{
page = (struct page *)page_private(page);
@@ -223,26 +242,6 @@ int lru_add_drain_all(void)
#endif
/*
- * This path almost never happens for VM activity - pages are normally
- * freed via pagevecs. But it gets used by networking.
- */
-void fastcall __page_cache_release(struct page *page)
-{
- if (PageLRU(page)) {
- unsigned long flags;
- struct zone *zone = page_zone(page);
-
- spin_lock_irqsave(&zone->lru_lock, flags);
- BUG_ON(!PageLRU(page));
- __ClearPageLRU(page);
- del_page_from_lru(zone, page);
- spin_unlock_irqrestore(&zone->lru_lock, flags);
- }
- free_hot_page(page);
-}
-EXPORT_SYMBOL(__page_cache_release);
-
-/*
* Batched page_cache_release(). Decrement the reference count on all the
* passed pages. If it fell to zero then remove the page from the LRU and
* free it.
@@ -284,7 +283,7 @@ void release_pages(struct page **pages, int nr, int cold)
zone = pagezone;
spin_lock_irq(&zone->lru_lock);
}
- BUG_ON(!PageLRU(page));
+ VM_BUG_ON(!PageLRU(page));
__ClearPageLRU(page);
del_page_from_lru(zone, page);
}
@@ -337,7 +336,7 @@ void __pagevec_release_nonlru(struct pagevec *pvec)
for (i = 0; i < pagevec_count(pvec); i++) {
struct page *page = pvec->pages[i];
- BUG_ON(PageLRU(page));
+ VM_BUG_ON(PageLRU(page));
if (put_page_testzero(page))
pagevec_add(&pages_to_free, page);
}
@@ -364,7 +363,7 @@ void __pagevec_lru_add(struct pagevec *pvec)
zone = pagezone;
spin_lock_irq(&zone->lru_lock);
}
- BUG_ON(PageLRU(page));
+ VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
add_page_to_inactive_list(zone, page);
}
@@ -391,9 +390,9 @@ void __pagevec_lru_add_active(struct pagevec *pvec)
zone = pagezone;
spin_lock_irq(&zone->lru_lock);
}
- BUG_ON(PageLRU(page));
+ VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
- BUG_ON(PageActive(page));
+ VM_BUG_ON(PageActive(page));
SetPageActive(page);
add_page_to_active_list(zone, page);
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 266162d2ba28..9aad8b0cc6ee 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -24,6 +24,9 @@
DEFINE_RWLOCK(vmlist_lock);
struct vm_struct *vmlist;
+static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
+ int node);
+
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
{
pte_t *pte;
@@ -478,8 +481,8 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
* allocator with @gfp_mask flags. Map them into contiguous
* kernel virtual space, using a pagetable protection of @prot.
*/
-void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
- int node)
+static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
+ int node)
{
struct vm_struct *area;
@@ -493,7 +496,6 @@ void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
return __vmalloc_area_node(area, gfp_mask, prot, node);
}
-EXPORT_SYMBOL(__vmalloc_node);
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
{
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5d4c4d02254d..87779dda4ec6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -62,6 +62,8 @@ struct scan_control {
int swap_cluster_max;
int swappiness;
+
+ int all_unreclaimable;
};
/*
@@ -377,8 +379,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
int remove_mapping(struct address_space *mapping, struct page *page)
{
- if (!mapping)
- return 0; /* truncate got there first */
+ BUG_ON(!PageLocked(page));
+ BUG_ON(mapping != page_mapping(page));
write_lock_irq(&mapping->tree_lock);
@@ -440,7 +442,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (TestSetPageLocked(page))
goto keep;
- BUG_ON(PageActive(page));
+ VM_BUG_ON(PageActive(page));
sc->nr_scanned++;
@@ -547,7 +549,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
goto free_it;
}
- if (!remove_mapping(mapping, page))
+ if (!mapping || !remove_mapping(mapping, page))
goto keep_locked;
free_it:
@@ -564,7 +566,7 @@ keep_locked:
unlock_page(page);
keep:
list_add(&page->lru, &ret_pages);
- BUG_ON(PageLRU(page));
+ VM_BUG_ON(PageLRU(page));
}
list_splice(&ret_pages, page_list);
if (pagevec_count(&freed_pvec))
@@ -603,7 +605,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags);
- BUG_ON(!PageLRU(page));
+ VM_BUG_ON(!PageLRU(page));
list_del(&page->lru);
target = src;
@@ -674,7 +676,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
*/
while (!list_empty(&page_list)) {
page = lru_to_page(&page_list);
- BUG_ON(PageLRU(page));
+ VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
list_del(&page->lru);
if (PageActive(page))
@@ -695,6 +697,11 @@ done:
return nr_reclaimed;
}
+static inline int zone_is_near_oom(struct zone *zone)
+{
+ return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3;
+}
+
/*
* This moves pages from the active list to the inactive list.
*
@@ -730,6 +737,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
long distress;
long swap_tendency;
+ if (zone_is_near_oom(zone))
+ goto force_reclaim_mapped;
+
/*
* `distress' is a measure of how much trouble we're having
* reclaiming pages. 0 -> no problems. 100 -> great trouble.
@@ -765,6 +775,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
* memory onto the inactive list.
*/
if (swap_tendency >= 100)
+force_reclaim_mapped:
reclaim_mapped = 1;
}
@@ -797,9 +808,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
while (!list_empty(&l_inactive)) {
page = lru_to_page(&l_inactive);
prefetchw_prev_lru_page(page, &l_inactive, flags);
- BUG_ON(PageLRU(page));
+ VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
- BUG_ON(!PageActive(page));
+ VM_BUG_ON(!PageActive(page));
ClearPageActive(page);
list_move(&page->lru, &zone->inactive_list);
@@ -827,9 +838,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
while (!list_empty(&l_active)) {
page = lru_to_page(&l_active);
prefetchw_prev_lru_page(page, &l_active, flags);
- BUG_ON(PageLRU(page));
+ VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
- BUG_ON(!PageActive(page));
+ VM_BUG_ON(!PageActive(page));
list_move(&page->lru, &zone->active_list);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
@@ -925,6 +936,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
unsigned long nr_reclaimed = 0;
int i;
+ sc->all_unreclaimable = 1;
for (i = 0; zones[i] != NULL; i++) {
struct zone *zone = zones[i];
@@ -941,6 +953,8 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
if (zone->all_unreclaimable && priority != DEF_PRIORITY)
continue; /* Let kswapd poll it */
+ sc->all_unreclaimable = 0;
+
nr_reclaimed += shrink_zone(priority, zone, sc);
}
return nr_reclaimed;
@@ -1021,6 +1035,9 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
blk_congestion_wait(WRITE, HZ/10);
}
+ /* top priority shrink_caches still had more to do? don't OOM, then */
+ if (!sc.all_unreclaimable)
+ ret = 1;
out:
for (i = 0; zones[i] != 0; i++) {
struct zone *zone = zones[i];
@@ -1153,7 +1170,7 @@ scan:
if (zone->all_unreclaimable)
continue;
if (nr_slab == 0 && zone->pages_scanned >=
- (zone->nr_active + zone->nr_inactive) * 4)
+ (zone->nr_active + zone->nr_inactive) * 6)
zone->all_unreclaimable = 1;
/*
* If we've done a decent amount of scanning and
@@ -1361,7 +1378,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
for_each_zone(zone)
lru_pages += zone->nr_active + zone->nr_inactive;
- nr_slab = global_page_state(NR_SLAB);
+ nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
/* If slab caches are huge, it's better to hit them first */
while (nr_slab >= lru_pages) {
reclaim_state.reclaimed_slab = 0;
@@ -1510,7 +1527,6 @@ int zone_reclaim_mode __read_mostly;
#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */
#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */
-#define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */
/*
* Priority for ZONE_RECLAIM. This determines the fraction of pages
@@ -1526,6 +1542,12 @@ int zone_reclaim_mode __read_mostly;
int sysctl_min_unmapped_ratio = 1;
/*
+ * If the number of slab pages in a zone grows beyond this percentage then
+ * slab reclaim needs to occur.
+ */
+int sysctl_min_slab_ratio = 5;
+
+/*
* Try to free up some pages from this zone through reclaim.
*/
static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
@@ -1544,6 +1566,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
.gfp_mask = gfp_mask,
.swappiness = vm_swappiness,
};
+ unsigned long slab_reclaimable;
disable_swap_token();
cond_resched();
@@ -1556,29 +1579,43 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
- /*
- * Free memory by calling shrink zone with increasing priorities
- * until we have enough memory freed.
- */
- priority = ZONE_RECLAIM_PRIORITY;
- do {
- nr_reclaimed += shrink_zone(priority, zone, &sc);
- priority--;
- } while (priority >= 0 && nr_reclaimed < nr_pages);
+ if (zone_page_state(zone, NR_FILE_PAGES) -
+ zone_page_state(zone, NR_FILE_MAPPED) >
+ zone->min_unmapped_pages) {
+ /*
+ * Free memory by calling shrink zone with increasing
+ * priorities until we have enough memory freed.
+ */
+ priority = ZONE_RECLAIM_PRIORITY;
+ do {
+ nr_reclaimed += shrink_zone(priority, zone, &sc);
+ priority--;
+ } while (priority >= 0 && nr_reclaimed < nr_pages);
+ }
- if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) {
+ slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
+ if (slab_reclaimable > zone->min_slab_pages) {
/*
* shrink_slab() does not currently allow us to determine how
- * many pages were freed in this zone. So we just shake the slab
- * a bit and then go off node for this particular allocation
- * despite possibly having freed enough memory to allocate in
- * this zone. If we freed local memory then the next
- * allocations will be local again.
+ * many pages were freed in this zone. So we take the current
+ * number of slab pages and shake the slab until it is reduced
+ * by the same nr_pages that we used for reclaiming unmapped
+ * pages.
*
- * shrink_slab will free memory on all zones and may take
- * a long time.
+ * Note that shrink_slab will free memory on all zones and may
+ * take a long time.
+ */
+ while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
+ zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
+ slab_reclaimable - nr_pages)
+ ;
+
+ /*
+ * Update nr_reclaimed by the number of slab pages we
+ * reclaimed from this zone.
*/
- shrink_slab(sc.nr_scanned, gfp_mask, order);
+ nr_reclaimed += slab_reclaimable -
+ zone_page_state(zone, NR_SLAB_RECLAIMABLE);
}
p->reclaim_state = NULL;
@@ -1592,7 +1629,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
int node_id;
/*
- * Zone reclaim reclaims unmapped file backed pages.
+ * Zone reclaim reclaims unmapped file backed pages and
+ * slab pages if we are over the defined limits.
*
* A small portion of unmapped file backed pages is needed for
* file I/O otherwise pages read by file I/O will be immediately
@@ -1601,7 +1639,9 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
* unmapped file backed pages.
*/
if (zone_page_state(zone, NR_FILE_PAGES) -
- zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio)
+ zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages
+ && zone_page_state(zone, NR_SLAB_RECLAIMABLE)
+ <= zone->min_slab_pages)
return 0;
/*
@@ -1621,7 +1661,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
* over remote processors and spread off node memory allocations
* as wide as possible.
*/
- node_id = zone->zone_pgdat->node_id;
+ node_id = zone_to_nid(zone);
mask = node_to_cpumask(node_id);
if (!cpus_empty(mask) && node_id != numa_node_id())
return 0;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c1b5f4106b38..490d8c1a0ded 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -321,6 +321,9 @@ void refresh_cpu_vm_stats(int cpu)
for_each_zone(zone) {
struct per_cpu_pageset *pcp;
+ if (!populated_zone(zone))
+ continue;
+
pcp = zone_pcp(zone, cpu);
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
@@ -435,12 +438,28 @@ struct seq_operations fragmentation_op = {
.show = frag_show,
};
+#ifdef CONFIG_ZONE_DMA32
+#define TEXT_FOR_DMA32(xx) xx "_dma32",
+#else
+#define TEXT_FOR_DMA32(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define TEXT_FOR_HIGHMEM(xx) xx "_high",
+#else
+#define TEXT_FOR_HIGHMEM(xx)
+#endif
+
+#define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \
+ TEXT_FOR_HIGHMEM(xx)
+
static char *vmstat_text[] = {
/* Zoned VM counters */
"nr_anon_pages",
"nr_mapped",
"nr_file_pages",
- "nr_slab",
+ "nr_slab_reclaimable",
+ "nr_slab_unreclaimable",
"nr_page_table_pages",
"nr_dirty",
"nr_writeback",
@@ -462,10 +481,7 @@ static char *vmstat_text[] = {
"pswpin",
"pswpout",
- "pgalloc_dma",
- "pgalloc_dma32",
- "pgalloc_normal",
- "pgalloc_high",
+ TEXTS_FOR_ZONES("pgalloc")
"pgfree",
"pgactivate",
@@ -474,25 +490,10 @@ static char *vmstat_text[] = {
"pgfault",
"pgmajfault",
- "pgrefill_dma",
- "pgrefill_dma32",
- "pgrefill_normal",
- "pgrefill_high",
-
- "pgsteal_dma",
- "pgsteal_dma32",
- "pgsteal_normal",
- "pgsteal_high",
-
- "pgscan_kswapd_dma",
- "pgscan_kswapd_dma32",
- "pgscan_kswapd_normal",
- "pgscan_kswapd_high",
-
- "pgscan_direct_dma",
- "pgscan_direct_dma32",
- "pgscan_direct_normal",
- "pgscan_direct_high",
+ TEXTS_FOR_ZONES("pgrefill")
+ TEXTS_FOR_ZONES("pgsteal")
+ TEXTS_FOR_ZONES("pgscan_kswapd")
+ TEXTS_FOR_ZONES("pgscan_direct")
"pginodesteal",
"slabs_scanned",
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 859e3359fcda..e2a095d0fd80 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -40,6 +40,22 @@ config IP_DCCP_DEBUG
Just say N.
+config NET_DCCPPROBE
+ tristate "DCCP connection probing"
+ depends on PROC_FS && KPROBES
+ ---help---
+ This module allows for capturing the changes to DCCP connection
+ state in response to incoming packets. It is used for debugging
+ DCCP congestion avoidance modules. If you don't understand
+ what was just said, you don't need it: say N.
+
+ Documentation on how to use the packet generator can be found
+ at http://linux-net.osdl.org/index.php/DccpProbe
+
+ To compile this code as a module, choose M here: the
+ module will be called dccp_probe.
+
+
endmenu
endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 7696e219b05d..17ed99c46617 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -11,9 +11,11 @@ dccp_ipv4-y := ipv4.o
dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
+obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
dccp-$(CONFIG_SYSCTL) += sysctl.o
dccp_diag-y := diag.o
+dccp_probe-y := probe.o
obj-y += ccids/
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 457dd3db7f41..2efb505aeb35 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -808,7 +808,7 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
static struct ccid_operations ccid2 = {
- .ccid_id = 2,
+ .ccid_id = DCCPC_CCID2,
.ccid_name = "ccid2",
.ccid_owner = THIS_MODULE,
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 195aa9566228..67d2dc0e7c67 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -1240,7 +1240,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
}
static struct ccid_operations ccid3 = {
- .ccid_id = 3,
+ .ccid_id = DCCPC_CCID3,
.ccid_name = "ccid3",
.ccid_owner = THIS_MODULE,
.ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock),
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 9a1a76a7dc41..66be29b6f508 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -56,9 +56,6 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
dp->dccps_role = DCCP_ROLE_CLIENT;
- if (dccp_service_not_initialized(sk))
- return -EPROTO;
-
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
new file mode 100644
index 000000000000..146496fce2e2
--- /dev/null
+++ b/net/dccp/probe.c
@@ -0,0 +1,198 @@
+/*
+ * dccp_probe - Observe the DCCP flow with kprobes.
+ *
+ * The idea for this came from Werner Almesberger's umlsim
+ * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
+ *
+ * Modified for DCCP from Stephen Hemminger's code
+ * Copyright (C) 2006, Ian McDonald <ian.mcdonald@jandi.co.nz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/socket.h>
+#include <linux/dccp.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+#include <linux/vmalloc.h>
+
+#include "dccp.h"
+#include "ccid.h"
+#include "ccids/ccid3.h"
+
+static int port;
+
+static int bufsize = 64 * 1024;
+
+static const char procname[] = "dccpprobe";
+
+struct {
+ struct kfifo *fifo;
+ spinlock_t lock;
+ wait_queue_head_t wait;
+ struct timeval tstart;
+} dccpw;
+
+static void printl(const char *fmt, ...)
+{
+ va_list args;
+ int len;
+ struct timeval now;
+ char tbuf[256];
+
+ va_start(args, fmt);
+ do_gettimeofday(&now);
+
+ now.tv_sec -= dccpw.tstart.tv_sec;
+ now.tv_usec -= dccpw.tstart.tv_usec;
+ if (now.tv_usec < 0) {
+ --now.tv_sec;
+ now.tv_usec += 1000000;
+ }
+
+ len = sprintf(tbuf, "%lu.%06lu ",
+ (unsigned long) now.tv_sec,
+ (unsigned long) now.tv_usec);
+ len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
+ va_end(args);
+
+ kfifo_put(dccpw.fifo, tbuf, len);
+ wake_up(&dccpw.wait);
+}
+
+static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t size)
+{
+ const struct dccp_minisock *dmsk = dccp_msk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ccid3_hc_tx_sock *hctx;
+
+ if (dmsk->dccpms_tx_ccid == DCCPC_CCID3)
+ hctx = ccid3_hc_tx_sk(sk);
+ else
+ hctx = NULL;
+
+ if (port == 0 || ntohs(inet->dport) == port ||
+ ntohs(inet->sport) == port) {
+ if (hctx)
+ printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n",
+ NIPQUAD(inet->saddr), ntohs(inet->sport),
+ NIPQUAD(inet->daddr), ntohs(inet->dport), size,
+ hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
+ hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi);
+ else
+ printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
+ NIPQUAD(inet->saddr), ntohs(inet->sport),
+ NIPQUAD(inet->daddr), ntohs(inet->dport), size);
+ }
+
+ jprobe_return();
+ return 0;
+}
+
+static struct jprobe dccp_send_probe = {
+ .kp = { .addr = (kprobe_opcode_t *)&dccp_sendmsg, },
+ .entry = (kprobe_opcode_t *)&jdccp_sendmsg,
+};
+
+static int dccpprobe_open(struct inode *inode, struct file *file)
+{
+ kfifo_reset(dccpw.fifo);
+ do_gettimeofday(&dccpw.tstart);
+ return 0;
+}
+
+static ssize_t dccpprobe_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ int error = 0, cnt = 0;
+ unsigned char *tbuf;
+
+ if (!buf || len < 0)
+ return -EINVAL;
+
+ if (len == 0)
+ return 0;
+
+ tbuf = vmalloc(len);
+ if (!tbuf)
+ return -ENOMEM;
+
+ error = wait_event_interruptible(dccpw.wait,
+ __kfifo_len(dccpw.fifo) != 0);
+ if (error)
+ goto out_free;
+
+ cnt = kfifo_get(dccpw.fifo, tbuf, len);
+ error = copy_to_user(buf, tbuf, cnt);
+
+out_free:
+ vfree(tbuf);
+
+ return error ? error : cnt;
+}
+
+static struct file_operations dccpprobe_fops = {
+ .owner = THIS_MODULE,
+ .open = dccpprobe_open,
+ .read = dccpprobe_read,
+};
+
+static __init int dccpprobe_init(void)
+{
+ int ret = -ENOMEM;
+
+ init_waitqueue_head(&dccpw.wait);
+ spin_lock_init(&dccpw.lock);
+ dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock);
+
+ if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops))
+ goto err0;
+
+ ret = register_jprobe(&dccp_send_probe);
+ if (ret)
+ goto err1;
+
+ pr_info("DCCP watch registered (port=%d)\n", port);
+ return 0;
+err1:
+ proc_net_remove(procname);
+err0:
+ kfifo_free(dccpw.fifo);
+ return ret;
+}
+module_init(dccpprobe_init);
+
+static __exit void dccpprobe_exit(void)
+{
+ kfifo_free(dccpw.fifo);
+ proc_net_remove(procname);
+ unregister_jprobe(&dccp_send_probe);
+
+}
+module_exit(dccpprobe_exit);
+
+MODULE_PARM_DESC(port, "Port to match (0=all)");
+module_param(port, int, 0);
+
+MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
+module_param(bufsize, int, 0);
+
+MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>");
+MODULE_DESCRIPTION("DCCP snooper");
+MODULE_LICENSE("GPL");
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 962df0ea31aa..72cbdcfc2c65 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -217,7 +217,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
icsk->icsk_sync_mss = dccp_sync_mss;
dp->dccps_mss_cache = 536;
dp->dccps_role = DCCP_ROLE_UNDEFINED;
- dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
+ dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
return 0;
@@ -267,12 +267,6 @@ static inline int dccp_listen_start(struct sock *sk)
struct dccp_sock *dp = dccp_sk(sk);
dp->dccps_role = DCCP_ROLE_LISTEN;
- /*
- * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
- * before calling listen()
- */
- if (dccp_service_not_initialized(sk))
- return -EPROTO;
return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
}
@@ -540,9 +534,6 @@ static int dccp_getsockopt_service(struct sock *sk, int len,
int err = -ENOENT, slen = 0, total_len = sizeof(u32);
lock_sock(sk);
- if (dccp_service_not_initialized(sk))
- goto out;
-
if ((sl = dp->dccps_service_list) != NULL) {
slen = sl->dccpsl_nr * sizeof(u32);
total_len += slen;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 1650b64415aa..30af4a4dfcc8 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -448,24 +448,22 @@ config INET_TCP_DIAG
depends on INET_DIAG
def_tristate INET_DIAG
-config TCP_CONG_ADVANCED
+menuconfig TCP_CONG_ADVANCED
bool "TCP: advanced congestion control"
---help---
Support for selection of various TCP congestion control
modules.
Nearly all users can safely say no here, and a safe default
- selection will be made (BIC-TCP with new Reno as a fallback).
+ selection will be made (CUBIC with new Reno as a fallback).
If unsure, say N.
-# TCP Reno is builtin (required as fallback)
-menu "TCP congestion control"
- depends on TCP_CONG_ADVANCED
+if TCP_CONG_ADVANCED
config TCP_CONG_BIC
tristate "Binary Increase Congestion (BIC) control"
- default y
+ default m
---help---
BIC-TCP is a sender-side only change that ensures a linear RTT
fairness under large windows while offering both scalability and
@@ -479,7 +477,7 @@ config TCP_CONG_BIC
config TCP_CONG_CUBIC
tristate "CUBIC TCP"
- default m
+ default y
---help---
This is version 2.0 of BIC-TCP which uses a cubic growth function
among other techniques.
@@ -574,12 +572,49 @@ config TCP_CONG_VENO
loss packets.
See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
-endmenu
+choice
+ prompt "Default TCP congestion control"
+ default DEFAULT_CUBIC
+ help
+ Select the TCP congestion control that will be used by default
+ for all connections.
-config TCP_CONG_BIC
+ config DEFAULT_BIC
+ bool "Bic" if TCP_CONG_BIC=y
+
+ config DEFAULT_CUBIC
+ bool "Cubic" if TCP_CONG_CUBIC=y
+
+ config DEFAULT_HTCP
+ bool "Htcp" if TCP_CONG_HTCP=y
+
+ config DEFAULT_VEGAS
+ bool "Vegas" if TCP_CONG_VEGAS=y
+
+ config DEFAULT_WESTWOOD
+ bool "Westwood" if TCP_CONG_WESTWOOD=y
+
+ config DEFAULT_RENO
+ bool "Reno"
+
+endchoice
+
+endif
+
+config TCP_CONG_CUBIC
tristate
depends on !TCP_CONG_ADVANCED
default y
+config DEFAULT_TCP_CONG
+ string
+ default "bic" if DEFAULT_BIC
+ default "cubic" if DEFAULT_CUBIC
+ default "htcp" if DEFAULT_HTCP
+ default "vegas" if DEFAULT_VEGAS
+ default "westwood" if DEFAULT_WESTWOOD
+ default "reno" if DEFAULT_RENO
+ default "cubic"
+
source "net/ipv4/ipvs/Kconfig"
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 80a2a0911b49..e6ce0b3ba62a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -259,7 +259,7 @@ void cipso_v4_cache_invalidate(void)
u32 iter;
for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
- spin_lock(&cipso_v4_cache[iter].lock);
+ spin_lock_bh(&cipso_v4_cache[iter].lock);
list_for_each_entry_safe(entry,
tmp_entry,
&cipso_v4_cache[iter].list, list) {
@@ -267,7 +267,7 @@ void cipso_v4_cache_invalidate(void)
cipso_v4_cache_entry_free(entry);
}
cipso_v4_cache[iter].size = 0;
- spin_unlock(&cipso_v4_cache[iter].lock);
+ spin_unlock_bh(&cipso_v4_cache[iter].lock);
}
return;
@@ -309,7 +309,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
hash = cipso_v4_map_cache_hash(key, key_len);
bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
- spin_lock(&cipso_v4_cache[bkt].lock);
+ spin_lock_bh(&cipso_v4_cache[bkt].lock);
list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
if (entry->hash == hash &&
entry->key_len == key_len &&
@@ -318,7 +318,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
secattr->cache.free = entry->lsm_data.free;
secattr->cache.data = entry->lsm_data.data;
if (prev_entry == NULL) {
- spin_unlock(&cipso_v4_cache[bkt].lock);
+ spin_unlock_bh(&cipso_v4_cache[bkt].lock);
return 0;
}
@@ -333,12 +333,12 @@ static int cipso_v4_cache_check(const unsigned char *key,
&prev_entry->list);
}
- spin_unlock(&cipso_v4_cache[bkt].lock);
+ spin_unlock_bh(&cipso_v4_cache[bkt].lock);
return 0;
}
prev_entry = entry;
}
- spin_unlock(&cipso_v4_cache[bkt].lock);
+ spin_unlock_bh(&cipso_v4_cache[bkt].lock);
return -ENOENT;
}
@@ -387,7 +387,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb,
entry->lsm_data.data = secattr->cache.data;
bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
- spin_lock(&cipso_v4_cache[bkt].lock);
+ spin_lock_bh(&cipso_v4_cache[bkt].lock);
if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
list_add(&entry->list, &cipso_v4_cache[bkt].list);
cipso_v4_cache[bkt].size += 1;
@@ -398,7 +398,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb,
list_add(&entry->list, &cipso_v4_cache[bkt].list);
cipso_v4_cache_entry_free(old_entry);
}
- spin_unlock(&cipso_v4_cache[bkt].lock);
+ spin_unlock_bh(&cipso_v4_cache[bkt].lock);
return 0;
@@ -530,197 +530,42 @@ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
}
/**
- * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff
- * @headroom: the amount of headroom to allocate for the sk_buff
+ * cipso_v4_doi_walk - Iterate through the DOI definitions
+ * @skip_cnt: skip past this number of DOI definitions, updated
+ * @callback: callback for each DOI definition
+ * @cb_arg: argument for the callback function
*
* Description:
- * Dump a list of all the configured DOI values into a sk_buff. The returned
- * sk_buff has room at the front of the sk_buff for @headroom bytes. See
- * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This
- * function may fail if another process is changing the DOI list at the same
- * time. Returns a pointer to a sk_buff on success, NULL on error.
+ * Iterate over the DOI definition list, skipping the first @skip_cnt entries.
+ * For each entry call @callback, if @callback returns a negative value stop
+ * 'walking' through the list and return. Updates the value in @skip_cnt upon
+ * return. Returns zero on success, negative values on failure.
*
*/
-struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
+int cipso_v4_doi_walk(u32 *skip_cnt,
+ int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
+ void *cb_arg)
{
- struct sk_buff *skb = NULL;
- struct cipso_v4_doi *iter;
+ int ret_val = -ENOENT;
u32 doi_cnt = 0;
- ssize_t buf_len;
+ struct cipso_v4_doi *iter_doi;
- buf_len = NETLBL_LEN_U32;
rcu_read_lock();
- list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
- if (iter->valid) {
- doi_cnt += 1;
- buf_len += 2 * NETLBL_LEN_U32;
- }
-
- skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
- if (skb == NULL)
- goto doi_dump_all_failure;
-
- if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0)
- goto doi_dump_all_failure;
- buf_len -= NETLBL_LEN_U32;
- list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
- if (iter->valid) {
- if (buf_len < 2 * NETLBL_LEN_U32)
- goto doi_dump_all_failure;
- if (nla_put_u32(skb, NLA_U32, iter->doi) != 0)
- goto doi_dump_all_failure;
- if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
- goto doi_dump_all_failure;
- buf_len -= 2 * NETLBL_LEN_U32;
- }
- rcu_read_unlock();
-
- return skb;
-
-doi_dump_all_failure:
- rcu_read_unlock();
- kfree(skb);
- return NULL;
-}
-
-/**
- * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff
- * @doi: the DOI value
- * @headroom: the amount of headroom to allocate for the sk_buff
- *
- * Description:
- * Lookup the DOI definition matching @doi and dump it's contents into a
- * sk_buff. The returned sk_buff has room at the front of the sk_buff for
- * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message
- * format. This function may fail if another process is changing the DOI list
- * at the same time. Returns a pointer to a sk_buff on success, NULL on error.
- *
- */
-struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
-{
- struct sk_buff *skb = NULL;
- struct cipso_v4_doi *iter;
- u32 tag_cnt = 0;
- u32 lvl_cnt = 0;
- u32 cat_cnt = 0;
- ssize_t buf_len;
- ssize_t tmp;
-
- rcu_read_lock();
- iter = cipso_v4_doi_getdef(doi);
- if (iter == NULL)
- goto doi_dump_failure;
- buf_len = NETLBL_LEN_U32;
- switch (iter->type) {
- case CIPSO_V4_MAP_PASS:
- buf_len += NETLBL_LEN_U32;
- while(tag_cnt < CIPSO_V4_TAG_MAXCNT &&
- iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
- tag_cnt += 1;
- buf_len += NETLBL_LEN_U8;
- }
- break;
- case CIPSO_V4_MAP_STD:
- buf_len += 3 * NETLBL_LEN_U32;
- while (tag_cnt < CIPSO_V4_TAG_MAXCNT &&
- iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
- tag_cnt += 1;
- buf_len += NETLBL_LEN_U8;
- }
- for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
- if (iter->map.std->lvl.local[tmp] !=
- CIPSO_V4_INV_LVL) {
- lvl_cnt += 1;
- buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8;
- }
- for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
- if (iter->map.std->cat.local[tmp] !=
- CIPSO_V4_INV_CAT) {
- cat_cnt += 1;
- buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16;
+ list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list)
+ if (iter_doi->valid) {
+ if (doi_cnt++ < *skip_cnt)
+ continue;
+ ret_val = callback(iter_doi, cb_arg);
+ if (ret_val < 0) {
+ doi_cnt--;
+ goto doi_walk_return;
}
- break;
- }
-
- skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
- if (skb == NULL)
- goto doi_dump_failure;
-
- if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
- goto doi_dump_failure;
- buf_len -= NETLBL_LEN_U32;
- if (iter != cipso_v4_doi_getdef(doi))
- goto doi_dump_failure;
- switch (iter->type) {
- case CIPSO_V4_MAP_PASS:
- if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
- goto doi_dump_failure;
- buf_len -= NETLBL_LEN_U32;
- for (tmp = 0;
- tmp < CIPSO_V4_TAG_MAXCNT &&
- iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
- tmp++) {
- if (buf_len < NETLBL_LEN_U8)
- goto doi_dump_failure;
- if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
- goto doi_dump_failure;
- buf_len -= NETLBL_LEN_U8;
}
- break;
- case CIPSO_V4_MAP_STD:
- if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
- goto doi_dump_failure;
- if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0)
- goto doi_dump_failure;
- if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0)
- goto doi_dump_failure;
- buf_len -= 3 * NETLBL_LEN_U32;
- for (tmp = 0;
- tmp < CIPSO_V4_TAG_MAXCNT &&
- iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
- tmp++) {
- if (buf_len < NETLBL_LEN_U8)
- goto doi_dump_failure;
- if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
- goto doi_dump_failure;
- buf_len -= NETLBL_LEN_U8;
- }
- for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
- if (iter->map.std->lvl.local[tmp] !=
- CIPSO_V4_INV_LVL) {
- if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8)
- goto doi_dump_failure;
- if (nla_put_u32(skb, NLA_U32, tmp) != 0)
- goto doi_dump_failure;
- if (nla_put_u8(skb,
- NLA_U8,
- iter->map.std->lvl.local[tmp]) != 0)
- goto doi_dump_failure;
- buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8;
- }
- for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
- if (iter->map.std->cat.local[tmp] !=
- CIPSO_V4_INV_CAT) {
- if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16)
- goto doi_dump_failure;
- if (nla_put_u32(skb, NLA_U32, tmp) != 0)
- goto doi_dump_failure;
- if (nla_put_u16(skb,
- NLA_U16,
- iter->map.std->cat.local[tmp]) != 0)
- goto doi_dump_failure;
- buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16;
- }
- break;
- }
- rcu_read_unlock();
-
- return skb;
-doi_dump_failure:
+doi_walk_return:
rcu_read_unlock();
- kfree(skb);
- return NULL;
+ *skip_cnt = doi_cnt;
+ return ret_val;
}
/**
@@ -1486,43 +1331,40 @@ socket_setattr_failure:
}
/**
- * cipso_v4_socket_getattr - Get the security attributes from a socket
- * @sock: the socket
+ * cipso_v4_sock_getattr - Get the security attributes from a sock
+ * @sk: the sock
* @secattr: the security attributes
*
* Description:
- * Query @sock to see if there is a CIPSO option attached to the socket and if
- * there is return the CIPSO security attributes in @secattr. Returns zero on
- * success and negative values on failure.
+ * Query @sk to see if there is a CIPSO option attached to the sock and if
+ * there is return the CIPSO security attributes in @secattr. This function
+ * requires that @sk be locked, or privately held, but it does not do any
+ * locking itself. Returns zero on success and negative values on failure.
*
*/
-int cipso_v4_socket_getattr(const struct socket *sock,
- struct netlbl_lsm_secattr *secattr)
+int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
{
int ret_val = -ENOMSG;
- struct sock *sk;
struct inet_sock *sk_inet;
unsigned char *cipso_ptr;
u32 doi;
struct cipso_v4_doi *doi_def;
- sk = sock->sk;
- lock_sock(sk);
sk_inet = inet_sk(sk);
if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0)
- goto socket_getattr_return;
+ return -ENOMSG;
cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso -
sizeof(struct iphdr);
ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr);
if (ret_val == 0)
- goto socket_getattr_return;
+ return ret_val;
doi = ntohl(*(u32 *)&cipso_ptr[2]);
rcu_read_lock();
doi_def = cipso_v4_doi_getdef(doi);
if (doi_def == NULL) {
rcu_read_unlock();
- goto socket_getattr_return;
+ return -ENOMSG;
}
switch (cipso_ptr[6]) {
case CIPSO_V4_TAG_RBITMAP:
@@ -1533,8 +1375,29 @@ int cipso_v4_socket_getattr(const struct socket *sock,
}
rcu_read_unlock();
-socket_getattr_return:
- release_sock(sk);
+ return ret_val;
+}
+
+/**
+ * cipso_v4_socket_getattr - Get the security attributes from a socket
+ * @sock: the socket
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Query @sock to see if there is a CIPSO option attached to the socket and if
+ * there is return the CIPSO security attributes in @secattr. Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_socket_getattr(const struct socket *sock,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+
+ lock_sock(sock->sk);
+ ret_val = cipso_v4_sock_getattr(sock->sk, secattr);
+ release_sock(sock->sk);
+
return ret_val;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 19b2071ff319..e82a5be894b5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -129,6 +129,12 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
return ret;
}
+static int __init tcp_congestion_default(void)
+{
+ return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
+}
+
+late_initcall(tcp_congestion_default);
ctl_table ipv4_table[] = {
{
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 7ff2e4273a7c..af0aca1e6be6 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -48,7 +48,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
printk(KERN_NOTICE "TCP %s already registered\n", ca->name);
ret = -EEXIST;
} else {
- list_add_rcu(&ca->list, &tcp_cong_list);
+ list_add_tail_rcu(&ca->list, &tcp_cong_list);
printk(KERN_INFO "TCP %s registered\n", ca->name);
}
spin_unlock(&tcp_cong_list_lock);
diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
index fe23cb7f1e87..9f7121ae13e9 100644
--- a/net/netlabel/Kconfig
+++ b/net/netlabel/Kconfig
@@ -9,6 +9,9 @@ config NETLABEL
---help---
NetLabel provides support for explicit network packet labeling
protocols such as CIPSO and RIPSO. For more information see
- Documentation/netlabel.
+ Documentation/netlabel as well as the NetLabel SourceForge project
+ for configuration tools and additional documentation.
+
+ * http://netlabel.sf.net
If you are unsure, say N.
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index a4f40adc447b..4125a55f469f 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -41,15 +41,37 @@
#include "netlabel_user.h"
#include "netlabel_cipso_v4.h"
+/* Argument struct for cipso_v4_doi_walk() */
+struct netlbl_cipsov4_doiwalk_arg {
+ struct netlink_callback *nl_cb;
+ struct sk_buff *skb;
+ u32 seq;
+};
+
/* NetLabel Generic NETLINK CIPSOv4 family */
static struct genl_family netlbl_cipsov4_gnl_family = {
.id = GENL_ID_GENERATE,
.hdrsize = 0,
.name = NETLBL_NLTYPE_CIPSOV4_NAME,
.version = NETLBL_PROTO_VERSION,
- .maxattr = 0,
+ .maxattr = NLBL_CIPSOV4_A_MAX,
};
+/* NetLabel Netlink attribute policy */
+static struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = {
+ [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 },
+ [NLBL_CIPSOV4_A_MTYPE] = { .type = NLA_U32 },
+ [NLBL_CIPSOV4_A_TAG] = { .type = NLA_U8 },
+ [NLBL_CIPSOV4_A_TAGLST] = { .type = NLA_NESTED },
+ [NLBL_CIPSOV4_A_MLSLVLLOC] = { .type = NLA_U32 },
+ [NLBL_CIPSOV4_A_MLSLVLREM] = { .type = NLA_U32 },
+ [NLBL_CIPSOV4_A_MLSLVL] = { .type = NLA_NESTED },
+ [NLBL_CIPSOV4_A_MLSLVLLST] = { .type = NLA_NESTED },
+ [NLBL_CIPSOV4_A_MLSCATLOC] = { .type = NLA_U32 },
+ [NLBL_CIPSOV4_A_MLSCATREM] = { .type = NLA_U32 },
+ [NLBL_CIPSOV4_A_MLSCAT] = { .type = NLA_NESTED },
+ [NLBL_CIPSOV4_A_MLSCATLST] = { .type = NLA_NESTED },
+};
/*
* Helper Functions
@@ -81,6 +103,41 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
kfree(ptr);
}
+/**
+ * netlbl_cipsov4_add_common - Parse the common sections of a ADD message
+ * @info: the Generic NETLINK info block
+ * @doi_def: the CIPSO V4 DOI definition
+ *
+ * Description:
+ * Parse the common sections of a ADD message and fill in the related values
+ * in @doi_def. Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_add_common(struct genl_info *info,
+ struct cipso_v4_doi *doi_def)
+{
+ struct nlattr *nla;
+ int nla_rem;
+ u32 iter = 0;
+
+ doi_def->doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+
+ if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST],
+ NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy) != 0)
+ return -EINVAL;
+
+ nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem)
+ if (nla->nla_type == NLBL_CIPSOV4_A_TAG) {
+ if (iter > CIPSO_V4_TAG_MAXCNT)
+ return -EINVAL;
+ doi_def->tags[iter++] = nla_get_u8(nla);
+ }
+ if (iter < CIPSO_V4_TAG_MAXCNT)
+ doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
+
+ return 0;
+}
/*
* NetLabel Command Handlers
@@ -88,9 +145,7 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
/**
* netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition
- * @doi: the DOI value
- * @msg: the ADD message data
- * @msg_size: the size of the ADD message buffer
+ * @info: the Generic NETLINK info block
*
* Description:
* Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message
@@ -98,29 +153,28 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
* error.
*
*/
-static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
+static int netlbl_cipsov4_add_std(struct genl_info *info)
{
int ret_val = -EINVAL;
- int msg_len = msg_size;
- u32 num_tags;
- u32 num_lvls;
- u32 num_cats;
struct cipso_v4_doi *doi_def = NULL;
- u32 iter;
- u32 tmp_val_a;
- u32 tmp_val_b;
+ struct nlattr *nla_a;
+ struct nlattr *nla_b;
+ int nla_a_rem;
+ int nla_b_rem;
- if (msg_len < NETLBL_LEN_U32)
- goto add_std_failure;
- num_tags = netlbl_getinc_u32(&msg, &msg_len);
- if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
- goto add_std_failure;
+ if (!info->attrs[NLBL_CIPSOV4_A_DOI] ||
+ !info->attrs[NLBL_CIPSOV4_A_TAGLST] ||
+ !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST])
+ return -EINVAL;
+
+ if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
+ NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy) != 0)
+ return -EINVAL;
doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
- if (doi_def == NULL) {
- ret_val = -ENOMEM;
- goto add_std_failure;
- }
+ if (doi_def == NULL)
+ return -ENOMEM;
doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
if (doi_def->map.std == NULL) {
ret_val = -ENOMEM;
@@ -128,28 +182,32 @@ static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
}
doi_def->type = CIPSO_V4_MAP_STD;
- for (iter = 0; iter < num_tags; iter++) {
- if (msg_len < NETLBL_LEN_U8)
- goto add_std_failure;
- doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
- switch (doi_def->tags[iter]) {
- case CIPSO_V4_TAG_RBITMAP:
- break;
- default:
- goto add_std_failure;
- }
- }
- if (iter < CIPSO_V4_TAG_MAXCNT)
- doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
-
- if (msg_len < 6 * NETLBL_LEN_U32)
+ ret_val = netlbl_cipsov4_add_common(info, doi_def);
+ if (ret_val != 0)
goto add_std_failure;
- num_lvls = netlbl_getinc_u32(&msg, &msg_len);
- if (num_lvls == 0)
- goto add_std_failure;
- doi_def->map.std->lvl.local_size = netlbl_getinc_u32(&msg, &msg_len);
- if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS)
+ nla_for_each_nested(nla_a,
+ info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
+ nla_a_rem)
+ if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) {
+ nla_for_each_nested(nla_b, nla_a, nla_b_rem)
+ switch (nla_b->nla_type) {
+ case NLBL_CIPSOV4_A_MLSLVLLOC:
+ if (nla_get_u32(nla_b) >=
+ doi_def->map.std->lvl.local_size)
+ doi_def->map.std->lvl.local_size =
+ nla_get_u32(nla_b) + 1;
+ break;
+ case NLBL_CIPSOV4_A_MLSLVLREM:
+ if (nla_get_u32(nla_b) >=
+ doi_def->map.std->lvl.cipso_size)
+ doi_def->map.std->lvl.cipso_size =
+ nla_get_u32(nla_b) + 1;
+ break;
+ }
+ }
+ if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS ||
+ doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS)
goto add_std_failure;
doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
sizeof(u32),
@@ -158,9 +216,6 @@ static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
ret_val = -ENOMEM;
goto add_std_failure;
}
- doi_def->map.std->lvl.cipso_size = netlbl_getinc_u8(&msg, &msg_len);
- if (doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS)
- goto add_std_failure;
doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
sizeof(u32),
GFP_KERNEL);
@@ -168,68 +223,101 @@ static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
ret_val = -ENOMEM;
goto add_std_failure;
}
+ nla_for_each_nested(nla_a,
+ info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
+ nla_a_rem)
+ if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) {
+ struct nlattr *lvl_loc;
+ struct nlattr *lvl_rem;
+
+ if (nla_validate_nested(nla_a,
+ NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy) != 0)
+ goto add_std_failure;
+
+ lvl_loc = nla_find_nested(nla_a,
+ NLBL_CIPSOV4_A_MLSLVLLOC);
+ lvl_rem = nla_find_nested(nla_a,
+ NLBL_CIPSOV4_A_MLSLVLREM);
+ if (lvl_loc == NULL || lvl_rem == NULL)
+ goto add_std_failure;
+ doi_def->map.std->lvl.local[nla_get_u32(lvl_loc)] =
+ nla_get_u32(lvl_rem);
+ doi_def->map.std->lvl.cipso[nla_get_u32(lvl_rem)] =
+ nla_get_u32(lvl_loc);
+ }
- num_cats = netlbl_getinc_u32(&msg, &msg_len);
- doi_def->map.std->cat.local_size = netlbl_getinc_u32(&msg, &msg_len);
- if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS)
- goto add_std_failure;
- doi_def->map.std->cat.local = kcalloc(doi_def->map.std->cat.local_size,
+ if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) {
+ if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
+ NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy) != 0)
+ goto add_std_failure;
+
+ nla_for_each_nested(nla_a,
+ info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
+ nla_a_rem)
+ if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) {
+ if (nla_validate_nested(nla_a,
+ NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy) != 0)
+ goto add_std_failure;
+ nla_for_each_nested(nla_b, nla_a, nla_b_rem)
+ switch (nla_b->nla_type) {
+ case NLBL_CIPSOV4_A_MLSCATLOC:
+ if (nla_get_u32(nla_b) >=
+ doi_def->map.std->cat.local_size)
+ doi_def->map.std->cat.local_size =
+ nla_get_u32(nla_b) + 1;
+ break;
+ case NLBL_CIPSOV4_A_MLSCATREM:
+ if (nla_get_u32(nla_b) >=
+ doi_def->map.std->cat.cipso_size)
+ doi_def->map.std->cat.cipso_size =
+ nla_get_u32(nla_b) + 1;
+ break;
+ }
+ }
+ if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS ||
+ doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS)
+ goto add_std_failure;
+ doi_def->map.std->cat.local = kcalloc(
+ doi_def->map.std->cat.local_size,
sizeof(u32),
GFP_KERNEL);
- if (doi_def->map.std->cat.local == NULL) {
- ret_val = -ENOMEM;
- goto add_std_failure;
- }
- doi_def->map.std->cat.cipso_size = netlbl_getinc_u16(&msg, &msg_len);
- if (doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS)
- goto add_std_failure;
- doi_def->map.std->cat.cipso = kcalloc(doi_def->map.std->cat.cipso_size,
+ if (doi_def->map.std->cat.local == NULL) {
+ ret_val = -ENOMEM;
+ goto add_std_failure;
+ }
+ doi_def->map.std->cat.cipso = kcalloc(
+ doi_def->map.std->cat.cipso_size,
sizeof(u32),
GFP_KERNEL);
- if (doi_def->map.std->cat.cipso == NULL) {
- ret_val = -ENOMEM;
- goto add_std_failure;
- }
-
- if (msg_len <
- num_lvls * (NETLBL_LEN_U32 + NETLBL_LEN_U8) +
- num_cats * (NETLBL_LEN_U32 + NETLBL_LEN_U16))
- goto add_std_failure;
-
- for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++)
- doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL;
- for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++)
- doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL;
- for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++)
- doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT;
- for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++)
- doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT;
-
- for (iter = 0; iter < num_lvls; iter++) {
- tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
- tmp_val_b = netlbl_getinc_u8(&msg, &msg_len);
-
- if (tmp_val_a >= doi_def->map.std->lvl.local_size ||
- tmp_val_b >= doi_def->map.std->lvl.cipso_size)
- goto add_std_failure;
-
- doi_def->map.std->lvl.cipso[tmp_val_b] = tmp_val_a;
- doi_def->map.std->lvl.local[tmp_val_a] = tmp_val_b;
- }
-
- for (iter = 0; iter < num_cats; iter++) {
- tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
- tmp_val_b = netlbl_getinc_u16(&msg, &msg_len);
-
- if (tmp_val_a >= doi_def->map.std->cat.local_size ||
- tmp_val_b >= doi_def->map.std->cat.cipso_size)
+ if (doi_def->map.std->cat.cipso == NULL) {
+ ret_val = -ENOMEM;
goto add_std_failure;
-
- doi_def->map.std->cat.cipso[tmp_val_b] = tmp_val_a;
- doi_def->map.std->cat.local[tmp_val_a] = tmp_val_b;
+ }
+ nla_for_each_nested(nla_a,
+ info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
+ nla_a_rem)
+ if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) {
+ struct nlattr *cat_loc;
+ struct nlattr *cat_rem;
+
+ cat_loc = nla_find_nested(nla_a,
+ NLBL_CIPSOV4_A_MLSCATLOC);
+ cat_rem = nla_find_nested(nla_a,
+ NLBL_CIPSOV4_A_MLSCATREM);
+ if (cat_loc == NULL || cat_rem == NULL)
+ goto add_std_failure;
+ doi_def->map.std->cat.local[
+ nla_get_u32(cat_loc)] =
+ nla_get_u32(cat_rem);
+ doi_def->map.std->cat.cipso[
+ nla_get_u32(cat_rem)] =
+ nla_get_u32(cat_loc);
+ }
}
- doi_def->doi = doi;
ret_val = cipso_v4_doi_add(doi_def);
if (ret_val != 0)
goto add_std_failure;
@@ -243,9 +331,7 @@ add_std_failure:
/**
* netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition
- * @doi: the DOI value
- * @msg: the ADD message data
- * @msg_size: the size of the ADD message buffer
+ * @info: the Generic NETLINK info block
*
* Description:
* Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message
@@ -253,52 +339,31 @@ add_std_failure:
* error.
*
*/
-static int netlbl_cipsov4_add_pass(u32 doi,
- struct nlattr *msg,
- size_t msg_size)
+static int netlbl_cipsov4_add_pass(struct genl_info *info)
{
- int ret_val = -EINVAL;
- int msg_len = msg_size;
- u32 num_tags;
+ int ret_val;
struct cipso_v4_doi *doi_def = NULL;
- u32 iter;
- if (msg_len < NETLBL_LEN_U32)
- goto add_pass_failure;
- num_tags = netlbl_getinc_u32(&msg, &msg_len);
- if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
- goto add_pass_failure;
+ if (!info->attrs[NLBL_CIPSOV4_A_DOI] ||
+ !info->attrs[NLBL_CIPSOV4_A_TAGLST])
+ return -EINVAL;
doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
- if (doi_def == NULL) {
- ret_val = -ENOMEM;
- goto add_pass_failure;
- }
+ if (doi_def == NULL)
+ return -ENOMEM;
doi_def->type = CIPSO_V4_MAP_PASS;
- for (iter = 0; iter < num_tags; iter++) {
- if (msg_len < NETLBL_LEN_U8)
- goto add_pass_failure;
- doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
- switch (doi_def->tags[iter]) {
- case CIPSO_V4_TAG_RBITMAP:
- break;
- default:
- goto add_pass_failure;
- }
- }
- if (iter < CIPSO_V4_TAG_MAXCNT)
- doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
+ ret_val = netlbl_cipsov4_add_common(info, doi_def);
+ if (ret_val != 0)
+ goto add_pass_failure;
- doi_def->doi = doi;
ret_val = cipso_v4_doi_add(doi_def);
if (ret_val != 0)
goto add_pass_failure;
return 0;
add_pass_failure:
- if (doi_def)
- netlbl_cipsov4_doi_free(&doi_def->rcu);
+ netlbl_cipsov4_doi_free(&doi_def->rcu);
return ret_val;
}
@@ -316,34 +381,21 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
{
int ret_val = -EINVAL;
- u32 doi;
u32 map_type;
- int msg_len = netlbl_netlink_payload_len(skb);
- struct nlattr *msg = netlbl_netlink_payload_data(skb);
-
- ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
- if (ret_val != 0)
- goto add_return;
- if (msg_len < 2 * NETLBL_LEN_U32)
- goto add_return;
+ if (!info->attrs[NLBL_CIPSOV4_A_MTYPE])
+ return -EINVAL;
- doi = netlbl_getinc_u32(&msg, &msg_len);
- map_type = netlbl_getinc_u32(&msg, &msg_len);
+ map_type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]);
switch (map_type) {
case CIPSO_V4_MAP_STD:
- ret_val = netlbl_cipsov4_add_std(doi, msg, msg_len);
+ ret_val = netlbl_cipsov4_add_std(info);
break;
case CIPSO_V4_MAP_PASS:
- ret_val = netlbl_cipsov4_add_pass(doi, msg, msg_len);
+ ret_val = netlbl_cipsov4_add_pass(info);
break;
}
-add_return:
- netlbl_netlink_send_ack(info,
- netlbl_cipsov4_gnl_family.id,
- NLBL_CIPSOV4_C_ACK,
- -ret_val);
return ret_val;
}
@@ -353,84 +405,239 @@ add_return:
* @info: the Generic NETLINK info block
*
* Description:
- * Process a user generated LIST message and respond accordingly. Returns
- * zero on success and negative values on error.
+ * Process a user generated LIST message and respond accordingly. While the
+ * response message generated by the kernel is straightforward, determining
+ * before hand the size of the buffer to allocate is not (we have to generate
+ * the message to know the size). In order to keep this function sane what we
+ * do is allocate a buffer of NLMSG_GOODSIZE and try to fit the response in
+ * that size, if we fail then we restart with a larger buffer and try again.
+ * We continue in this manner until we hit a limit of failed attempts then we
+ * give up and just send an error message. Returns zero on success and
+ * negative values on error.
*
*/
static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info)
{
- int ret_val = -EINVAL;
+ int ret_val;
+ struct sk_buff *ans_skb = NULL;
+ u32 nlsze_mult = 1;
+ void *data;
u32 doi;
- struct nlattr *msg = netlbl_netlink_payload_data(skb);
- struct sk_buff *ans_skb;
+ struct nlattr *nla_a;
+ struct nlattr *nla_b;
+ struct cipso_v4_doi *doi_def;
+ u32 iter;
- if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32)
+ if (!info->attrs[NLBL_CIPSOV4_A_DOI]) {
+ ret_val = -EINVAL;
goto list_failure;
+ }
- doi = nla_get_u32(msg);
- ans_skb = cipso_v4_doi_dump(doi, NLMSG_SPACE(GENL_HDRLEN));
+list_start:
+ ans_skb = nlmsg_new(NLMSG_GOODSIZE * nlsze_mult, GFP_KERNEL);
if (ans_skb == NULL) {
ret_val = -ENOMEM;
goto list_failure;
}
- netlbl_netlink_hdr_push(ans_skb,
- info->snd_pid,
- 0,
- netlbl_cipsov4_gnl_family.id,
- NLBL_CIPSOV4_C_LIST);
+ data = netlbl_netlink_hdr_put(ans_skb,
+ info->snd_pid,
+ info->snd_seq,
+ netlbl_cipsov4_gnl_family.id,
+ 0,
+ NLBL_CIPSOV4_C_LIST);
+ if (data == NULL) {
+ ret_val = -ENOMEM;
+ goto list_failure;
+ }
+
+ doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+
+ rcu_read_lock();
+ doi_def = cipso_v4_doi_getdef(doi);
+ if (doi_def == NULL) {
+ ret_val = -EINVAL;
+ goto list_failure;
+ }
+
+ ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type);
+ if (ret_val != 0)
+ goto list_failure_lock;
+
+ nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_TAGLST);
+ if (nla_a == NULL) {
+ ret_val = -ENOMEM;
+ goto list_failure_lock;
+ }
+ for (iter = 0;
+ iter < CIPSO_V4_TAG_MAXCNT &&
+ doi_def->tags[iter] != CIPSO_V4_TAG_INVALID;
+ iter++) {
+ ret_val = nla_put_u8(ans_skb,
+ NLBL_CIPSOV4_A_TAG,
+ doi_def->tags[iter]);
+ if (ret_val != 0)
+ goto list_failure_lock;
+ }
+ nla_nest_end(ans_skb, nla_a);
+
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_STD:
+ nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST);
+ if (nla_a == NULL) {
+ ret_val = -ENOMEM;
+ goto list_failure_lock;
+ }
+ for (iter = 0;
+ iter < doi_def->map.std->lvl.local_size;
+ iter++) {
+ if (doi_def->map.std->lvl.local[iter] ==
+ CIPSO_V4_INV_LVL)
+ continue;
+
+ nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVL);
+ if (nla_b == NULL) {
+ ret_val = -ENOMEM;
+ goto list_retry;
+ }
+ ret_val = nla_put_u32(ans_skb,
+ NLBL_CIPSOV4_A_MLSLVLLOC,
+ iter);
+ if (ret_val != 0)
+ goto list_retry;
+ ret_val = nla_put_u32(ans_skb,
+ NLBL_CIPSOV4_A_MLSLVLREM,
+ doi_def->map.std->lvl.local[iter]);
+ if (ret_val != 0)
+ goto list_retry;
+ nla_nest_end(ans_skb, nla_b);
+ }
+ nla_nest_end(ans_skb, nla_a);
+
+ nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCATLST);
+ if (nla_a == NULL) {
+ ret_val = -ENOMEM;
+ goto list_retry;
+ }
+ for (iter = 0;
+ iter < doi_def->map.std->cat.local_size;
+ iter++) {
+ if (doi_def->map.std->cat.local[iter] ==
+ CIPSO_V4_INV_CAT)
+ continue;
+
+ nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCAT);
+ if (nla_b == NULL) {
+ ret_val = -ENOMEM;
+ goto list_retry;
+ }
+ ret_val = nla_put_u32(ans_skb,
+ NLBL_CIPSOV4_A_MLSCATLOC,
+ iter);
+ if (ret_val != 0)
+ goto list_retry;
+ ret_val = nla_put_u32(ans_skb,
+ NLBL_CIPSOV4_A_MLSCATREM,
+ doi_def->map.std->cat.local[iter]);
+ if (ret_val != 0)
+ goto list_retry;
+ nla_nest_end(ans_skb, nla_b);
+ }
+ nla_nest_end(ans_skb, nla_a);
+
+ break;
+ }
+ rcu_read_unlock();
- ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+ genlmsg_end(ans_skb, data);
+
+ ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
if (ret_val != 0)
goto list_failure;
return 0;
+list_retry:
+ /* XXX - this limit is a guesstimate */
+ if (nlsze_mult < 4) {
+ rcu_read_unlock();
+ kfree_skb(ans_skb);
+ nlsze_mult++;
+ goto list_start;
+ }
+list_failure_lock:
+ rcu_read_unlock();
list_failure:
- netlbl_netlink_send_ack(info,
- netlbl_cipsov4_gnl_family.id,
- NLBL_CIPSOV4_C_ACK,
- -ret_val);
+ kfree_skb(ans_skb);
+ return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_listall_cb - cipso_v4_doi_walk() callback for LISTALL
+ * @doi_def: the CIPSOv4 DOI definition
+ * @arg: the netlbl_cipsov4_doiwalk_arg structure
+ *
+ * Description:
+ * This function is designed to be used as a callback to the
+ * cipso_v4_doi_walk() function for use in generating a response for a LISTALL
+ * message. Returns the size of the message on success, negative values on
+ * failure.
+ *
+ */
+static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg)
+{
+ int ret_val = -ENOMEM;
+ struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg;
+ void *data;
+
+ data = netlbl_netlink_hdr_put(cb_arg->skb,
+ NETLINK_CB(cb_arg->nl_cb->skb).pid,
+ cb_arg->seq,
+ netlbl_cipsov4_gnl_family.id,
+ NLM_F_MULTI,
+ NLBL_CIPSOV4_C_LISTALL);
+ if (data == NULL)
+ goto listall_cb_failure;
+
+ ret_val = nla_put_u32(cb_arg->skb, NLBL_CIPSOV4_A_DOI, doi_def->doi);
+ if (ret_val != 0)
+ goto listall_cb_failure;
+ ret_val = nla_put_u32(cb_arg->skb,
+ NLBL_CIPSOV4_A_MTYPE,
+ doi_def->type);
+ if (ret_val != 0)
+ goto listall_cb_failure;
+
+ return genlmsg_end(cb_arg->skb, data);
+
+listall_cb_failure:
+ genlmsg_cancel(cb_arg->skb, data);
return ret_val;
}
/**
* netlbl_cipsov4_listall - Handle a LISTALL message
* @skb: the NETLINK buffer
- * @info: the Generic NETLINK info block
+ * @cb: the NETLINK callback
*
* Description:
* Process a user generated LISTALL message and respond accordingly. Returns
* zero on success and negative values on error.
*
*/
-static int netlbl_cipsov4_listall(struct sk_buff *skb, struct genl_info *info)
+static int netlbl_cipsov4_listall(struct sk_buff *skb,
+ struct netlink_callback *cb)
{
- int ret_val = -EINVAL;
- struct sk_buff *ans_skb;
+ struct netlbl_cipsov4_doiwalk_arg cb_arg;
+ int doi_skip = cb->args[0];
- ans_skb = cipso_v4_doi_dump_all(NLMSG_SPACE(GENL_HDRLEN));
- if (ans_skb == NULL) {
- ret_val = -ENOMEM;
- goto listall_failure;
- }
- netlbl_netlink_hdr_push(ans_skb,
- info->snd_pid,
- 0,
- netlbl_cipsov4_gnl_family.id,
- NLBL_CIPSOV4_C_LISTALL);
+ cb_arg.nl_cb = cb;
+ cb_arg.skb = skb;
+ cb_arg.seq = cb->nlh->nlmsg_seq;
- ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
- if (ret_val != 0)
- goto listall_failure;
-
- return 0;
+ cipso_v4_doi_walk(&doi_skip, netlbl_cipsov4_listall_cb, &cb_arg);
-listall_failure:
- netlbl_netlink_send_ack(info,
- netlbl_cipsov4_gnl_family.id,
- NLBL_CIPSOV4_C_ACK,
- -ret_val);
- return ret_val;
+ cb->args[0] = doi_skip;
+ return skb->len;
}
/**
@@ -445,27 +652,14 @@ listall_failure:
*/
static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
{
- int ret_val;
+ int ret_val = -EINVAL;
u32 doi;
- struct nlattr *msg = netlbl_netlink_payload_data(skb);
- ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
- if (ret_val != 0)
- goto remove_return;
-
- if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) {
- ret_val = -EINVAL;
- goto remove_return;
+ if (info->attrs[NLBL_CIPSOV4_A_DOI]) {
+ doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+ ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free);
}
- doi = nla_get_u32(msg);
- ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free);
-
-remove_return:
- netlbl_netlink_send_ack(info,
- netlbl_cipsov4_gnl_family.id,
- NLBL_CIPSOV4_C_ACK,
- -ret_val);
return ret_val;
}
@@ -475,14 +669,16 @@ remove_return:
static struct genl_ops netlbl_cipsov4_genl_c_add = {
.cmd = NLBL_CIPSOV4_C_ADD,
- .flags = 0,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_cipsov4_genl_policy,
.doit = netlbl_cipsov4_add,
.dumpit = NULL,
};
static struct genl_ops netlbl_cipsov4_genl_c_remove = {
.cmd = NLBL_CIPSOV4_C_REMOVE,
- .flags = 0,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_cipsov4_genl_policy,
.doit = netlbl_cipsov4_remove,
.dumpit = NULL,
};
@@ -490,6 +686,7 @@ static struct genl_ops netlbl_cipsov4_genl_c_remove = {
static struct genl_ops netlbl_cipsov4_genl_c_list = {
.cmd = NLBL_CIPSOV4_C_LIST,
.flags = 0,
+ .policy = netlbl_cipsov4_genl_policy,
.doit = netlbl_cipsov4_list,
.dumpit = NULL,
};
@@ -497,8 +694,9 @@ static struct genl_ops netlbl_cipsov4_genl_c_list = {
static struct genl_ops netlbl_cipsov4_genl_c_listall = {
.cmd = NLBL_CIPSOV4_C_LISTALL,
.flags = 0,
- .doit = netlbl_cipsov4_listall,
- .dumpit = NULL,
+ .policy = netlbl_cipsov4_genl_policy,
+ .doit = NULL,
+ .dumpit = netlbl_cipsov4_listall,
};
/*
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index 4c6ff4b93004..f03cf9b78286 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -34,175 +34,71 @@
#include <net/netlabel.h>
/*
- * The following NetLabel payloads are supported by the CIPSO subsystem, all
- * of which are preceeded by the nlmsghdr struct.
+ * The following NetLabel payloads are supported by the CIPSO subsystem.
*
- * o ACK:
- * Sent by the kernel in response to an applications message, applications
- * should never send this message.
+ * o ADD:
+ * Sent by an application to add a new DOI mapping table.
*
- * +----------------------+-----------------------+
- * | seq number (32 bits) | return code (32 bits) |
- * +----------------------+-----------------------+
+ * Required attributes:
*
- * seq number: the sequence number of the original message, taken from the
- * nlmsghdr structure
- * return code: return value, based on errno values
+ * NLBL_CIPSOV4_A_DOI
+ * NLBL_CIPSOV4_A_MTYPE
+ * NLBL_CIPSOV4_A_TAGLST
*
- * o ADD:
- * Sent by an application to add a new DOI mapping table, after completion
- * of the task the kernel should ACK this message.
- *
- * +---------------+--------------------+---------------------+
- * | DOI (32 bits) | map type (32 bits) | tag count (32 bits) | ...
- * +---------------+--------------------+---------------------+
- *
- * +-----------------+
- * | tag #X (8 bits) | ... repeated
- * +-----------------+
- *
- * +-------------- ---- --- -- -
- * | mapping data
- * +-------------- ---- --- -- -
- *
- * DOI: the DOI value
- * map type: the mapping table type (defined in the cipso_ipv4.h header
- * as CIPSO_V4_MAP_*)
- * tag count: the number of tags, must be greater than zero
- * tag: the CIPSO tag for the DOI, tags listed first are given
- * higher priorirty when sending packets
- * mapping data: specific to the map type (see below)
- *
- * CIPSO_V4_MAP_STD
- *
- * +------------------+-----------------------+----------------------+
- * | levels (32 bits) | max l level (32 bits) | max r level (8 bits) | ...
- * +------------------+-----------------------+----------------------+
- *
- * +----------------------+---------------------+---------------------+
- * | categories (32 bits) | max l cat (32 bits) | max r cat (16 bits) | ...
- * +----------------------+---------------------+---------------------+
- *
- * +--------------------------+-------------------------+
- * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
- * +--------------------------+-------------------------+
- *
- * +-----------------------------+-----------------------------+
- * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
- * +-----------------------------+-----------------------------+
- *
- * levels: the number of level mappings
- * max l level: the highest local level
- * max r level: the highest remote/CIPSO level
- * categories: the number of category mappings
- * max l cat: the highest local category
- * max r cat: the highest remote/CIPSO category
- * local level: the local part of a level mapping
- * CIPSO level: the remote/CIPSO part of a level mapping
- * local category: the local part of a category mapping
- * CIPSO category: the remote/CIPSO part of a category mapping
- *
- * CIPSO_V4_MAP_PASS
- *
- * No mapping data is needed for this map type.
+ * If using CIPSO_V4_MAP_STD the following attributes are required:
+ *
+ * NLBL_CIPSOV4_A_MLSLVLLST
+ * NLBL_CIPSOV4_A_MLSCATLST
+ *
+ * If using CIPSO_V4_MAP_PASS no additional attributes are required.
*
* o REMOVE:
* Sent by an application to remove a specific DOI mapping table from the
- * CIPSO V4 system. The kernel should ACK this message.
+ * CIPSO V4 system.
*
- * +---------------+
- * | DOI (32 bits) |
- * +---------------+
+ * Required attributes:
*
- * DOI: the DOI value
+ * NLBL_CIPSOV4_A_DOI
*
* o LIST:
- * Sent by an application to list the details of a DOI definition. The
- * kernel should send an ACK on error or a response as indicated below. The
- * application generated message format is shown below.
+ * Sent by an application to list the details of a DOI definition. On
+ * success the kernel should send a response using the following format.
*
- * +---------------+
- * | DOI (32 bits) |
- * +---------------+
+ * Required attributes:
*
- * DOI: the DOI value
+ * NLBL_CIPSOV4_A_DOI
*
* The valid response message format depends on the type of the DOI mapping,
- * the known formats are shown below.
- *
- * +--------------------+
- * | map type (32 bits) | ...
- * +--------------------+
- *
- * map type: the DOI mapping table type (defined in the cipso_ipv4.h
- * header as CIPSO_V4_MAP_*)
- *
- * (map type == CIPSO_V4_MAP_STD)
- *
- * +----------------+------------------+----------------------+
- * | tags (32 bits) | levels (32 bits) | categories (32 bits) | ...
- * +----------------+------------------+----------------------+
+ * the defined formats are shown below.
*
- * +-----------------+
- * | tag #X (8 bits) | ... repeated
- * +-----------------+
+ * Required attributes:
*
- * +--------------------------+-------------------------+
- * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
- * +--------------------------+-------------------------+
+ * NLBL_CIPSOV4_A_MTYPE
+ * NLBL_CIPSOV4_A_TAGLST
*
- * +-----------------------------+-----------------------------+
- * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
- * +-----------------------------+-----------------------------+
+ * If using CIPSO_V4_MAP_STD the following attributes are required:
*
- * tags: the number of CIPSO tag types
- * levels: the number of level mappings
- * categories: the number of category mappings
- * tag: the tag number, tags listed first are given higher
- * priority when sending packets
- * local level: the local part of a level mapping
- * CIPSO level: the remote/CIPSO part of a level mapping
- * local category: the local part of a category mapping
- * CIPSO category: the remote/CIPSO part of a category mapping
+ * NLBL_CIPSOV4_A_MLSLVLLST
+ * NLBL_CIPSOV4_A_MLSCATLST
*
- * (map type == CIPSO_V4_MAP_PASS)
- *
- * +----------------+
- * | tags (32 bits) | ...
- * +----------------+
- *
- * +-----------------+
- * | tag #X (8 bits) | ... repeated
- * +-----------------+
- *
- * tags: the number of CIPSO tag types
- * tag: the tag number, tags listed first are given higher
- * priority when sending packets
+ * If using CIPSO_V4_MAP_PASS no additional attributes are required.
*
* o LISTALL:
* This message is sent by an application to list the valid DOIs on the
- * system. There is no payload and the kernel should respond with an ACK
- * or the following message.
- *
- * +---------------------+------------------+-----------------------+
- * | DOI count (32 bits) | DOI #X (32 bits) | map type #X (32 bits) |
- * +---------------------+------------------+-----------------------+
+ * system. When sent by an application there is no payload and the
+ * NLM_F_DUMP flag should be set. The kernel should respond with a series of
+ * the following messages.
*
- * +-----------------------+
- * | map type #X (32 bits) | ...
- * +-----------------------+
+ * Required attributes:
*
- * DOI count: the number of DOIs
- * DOI: the DOI value
- * map type: the DOI mapping table type (defined in the cipso_ipv4.h
- * header as CIPSO_V4_MAP_*)
+ * NLBL_CIPSOV4_A_DOI
+ * NLBL_CIPSOV4_A_MTYPE
*
*/
/* NetLabel CIPSOv4 commands */
enum {
NLBL_CIPSOV4_C_UNSPEC,
- NLBL_CIPSOV4_C_ACK,
NLBL_CIPSOV4_C_ADD,
NLBL_CIPSOV4_C_REMOVE,
NLBL_CIPSOV4_C_LIST,
@@ -211,6 +107,59 @@ enum {
};
#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1)
+/* NetLabel CIPSOv4 attributes */
+enum {
+ NLBL_CIPSOV4_A_UNSPEC,
+ NLBL_CIPSOV4_A_DOI,
+ /* (NLA_U32)
+ * the DOI value */
+ NLBL_CIPSOV4_A_MTYPE,
+ /* (NLA_U32)
+ * the mapping table type (defined in the cipso_ipv4.h header as
+ * CIPSO_V4_MAP_*) */
+ NLBL_CIPSOV4_A_TAG,
+ /* (NLA_U8)
+ * a CIPSO tag type, meant to be used within a NLBL_CIPSOV4_A_TAGLST
+ * attribute */
+ NLBL_CIPSOV4_A_TAGLST,
+ /* (NLA_NESTED)
+ * the CIPSO tag list for the DOI, there must be at least one
+ * NLBL_CIPSOV4_A_TAG attribute, tags listed first are given higher
+ * priorirty when sending packets */
+ NLBL_CIPSOV4_A_MLSLVLLOC,
+ /* (NLA_U32)
+ * the local MLS sensitivity level */
+ NLBL_CIPSOV4_A_MLSLVLREM,
+ /* (NLA_U32)
+ * the remote MLS sensitivity level */
+ NLBL_CIPSOV4_A_MLSLVL,
+ /* (NLA_NESTED)
+ * a MLS sensitivity level mapping, must contain only one attribute of
+ * each of the following types: NLBL_CIPSOV4_A_MLSLVLLOC and
+ * NLBL_CIPSOV4_A_MLSLVLREM */
+ NLBL_CIPSOV4_A_MLSLVLLST,
+ /* (NLA_NESTED)
+ * the CIPSO level mappings, there must be at least one
+ * NLBL_CIPSOV4_A_MLSLVL attribute */
+ NLBL_CIPSOV4_A_MLSCATLOC,
+ /* (NLA_U32)
+ * the local MLS category */
+ NLBL_CIPSOV4_A_MLSCATREM,
+ /* (NLA_U32)
+ * the remote MLS category */
+ NLBL_CIPSOV4_A_MLSCAT,
+ /* (NLA_NESTED)
+ * a MLS category mapping, must contain only one attribute of each of
+ * the following types: NLBL_CIPSOV4_A_MLSCATLOC and
+ * NLBL_CIPSOV4_A_MLSCATREM */
+ NLBL_CIPSOV4_A_MLSCATLST,
+ /* (NLA_NESTED)
+ * the CIPSO category mappings, there must be at least one
+ * NLBL_CIPSOV4_A_MLSCAT attribute */
+ __NLBL_CIPSOV4_A_MAX,
+};
+#define NLBL_CIPSOV4_A_MAX (__NLBL_CIPSOV4_A_MAX - 1)
+
/* NetLabel protocol functions */
int netlbl_cipsov4_genl_init(void);
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 0489a1378101..f56d7a8ac7b7 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -354,160 +354,51 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
}
/**
- * netlbl_domhsh_dump - Dump the domain hash table into a sk_buff
+ * netlbl_domhsh_walk - Iterate through the domain mapping hash table
+ * @skip_bkt: the number of buckets to skip at the start
+ * @skip_chain: the number of entries to skip in the first iterated bucket
+ * @callback: callback for each entry
+ * @cb_arg: argument for the callback function
*
* Description:
- * Dump the domain hash table into a buffer suitable for returning to an
- * application in response to a NetLabel management DOMAIN message. This
- * function may fail if another process is growing the hash table at the same
- * time. The returned sk_buff has room at the front of the sk_buff for
- * @headroom bytes. See netlabel.h for the DOMAIN message format. Returns a
- * pointer to a sk_buff on success, NULL on error.
+ * Interate over the domain mapping hash table, skipping the first @skip_bkt
+ * buckets and @skip_chain entries. For each entry in the table call
+ * @callback, if @callback returns a negative value stop 'walking' through the
+ * table and return. Updates the values in @skip_bkt and @skip_chain on
+ * return. Returns zero on succcess, negative values on failure.
*
*/
-struct sk_buff *netlbl_domhsh_dump(size_t headroom)
+int netlbl_domhsh_walk(u32 *skip_bkt,
+ u32 *skip_chain,
+ int (*callback) (struct netlbl_dom_map *entry, void *arg),
+ void *cb_arg)
{
- struct sk_buff *skb = NULL;
- ssize_t buf_len;
- u32 bkt_iter;
- u32 dom_cnt = 0;
- struct netlbl_domhsh_tbl *hsh_tbl;
- struct netlbl_dom_map *list_iter;
- ssize_t tmp_len;
+ int ret_val = -ENOENT;
+ u32 iter_bkt;
+ struct netlbl_dom_map *iter_entry;
+ u32 chain_cnt = 0;
- buf_len = NETLBL_LEN_U32;
rcu_read_lock();
- hsh_tbl = rcu_dereference(netlbl_domhsh);
- for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
- list_for_each_entry_rcu(list_iter,
- &hsh_tbl->tbl[bkt_iter], list) {
- buf_len += NETLBL_LEN_U32 +
- nla_total_size(strlen(list_iter->domain) + 1);
- switch (list_iter->type) {
- case NETLBL_NLTYPE_UNLABELED:
- break;
- case NETLBL_NLTYPE_CIPSOV4:
- buf_len += 2 * NETLBL_LEN_U32;
- break;
- }
- dom_cnt++;
- }
-
- skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
- if (skb == NULL)
- goto dump_failure;
-
- if (nla_put_u32(skb, NLA_U32, dom_cnt) != 0)
- goto dump_failure;
- buf_len -= NETLBL_LEN_U32;
- hsh_tbl = rcu_dereference(netlbl_domhsh);
- for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
- list_for_each_entry_rcu(list_iter,
- &hsh_tbl->tbl[bkt_iter], list) {
- tmp_len = nla_total_size(strlen(list_iter->domain) +
- 1);
- if (buf_len < NETLBL_LEN_U32 + tmp_len)
- goto dump_failure;
- if (nla_put_string(skb,
- NLA_STRING,
- list_iter->domain) != 0)
- goto dump_failure;
- if (nla_put_u32(skb, NLA_U32, list_iter->type) != 0)
- goto dump_failure;
- buf_len -= NETLBL_LEN_U32 + tmp_len;
- switch (list_iter->type) {
- case NETLBL_NLTYPE_UNLABELED:
- break;
- case NETLBL_NLTYPE_CIPSOV4:
- if (buf_len < 2 * NETLBL_LEN_U32)
- goto dump_failure;
- if (nla_put_u32(skb,
- NLA_U32,
- list_iter->type_def.cipsov4->type) != 0)
- goto dump_failure;
- if (nla_put_u32(skb,
- NLA_U32,
- list_iter->type_def.cipsov4->doi) != 0)
- goto dump_failure;
- buf_len -= 2 * NETLBL_LEN_U32;
- break;
+ for (iter_bkt = *skip_bkt;
+ iter_bkt < rcu_dereference(netlbl_domhsh)->size;
+ iter_bkt++, chain_cnt = 0) {
+ list_for_each_entry_rcu(iter_entry,
+ &netlbl_domhsh->tbl[iter_bkt],
+ list)
+ if (iter_entry->valid) {
+ if (chain_cnt++ < *skip_chain)
+ continue;
+ ret_val = callback(iter_entry, cb_arg);
+ if (ret_val < 0) {
+ chain_cnt--;
+ goto walk_return;
+ }
}
- }
- rcu_read_unlock();
-
- return skb;
-
-dump_failure:
- rcu_read_unlock();
- kfree_skb(skb);
- return NULL;
-}
-
-/**
- * netlbl_domhsh_dump_default - Dump the default domain mapping into a sk_buff
- *
- * Description:
- * Dump the default domain mapping into a buffer suitable for returning to an
- * application in response to a NetLabel management DEFDOMAIN message. This
- * function may fail if another process is changing the default domain mapping
- * at the same time. The returned sk_buff has room at the front of the
- * skb_buff for @headroom bytes. See netlabel.h for the DEFDOMAIN message
- * format. Returns a pointer to a sk_buff on success, NULL on error.
- *
- */
-struct sk_buff *netlbl_domhsh_dump_default(size_t headroom)
-{
- struct sk_buff *skb;
- ssize_t buf_len;
- struct netlbl_dom_map *entry;
-
- buf_len = NETLBL_LEN_U32;
- rcu_read_lock();
- entry = rcu_dereference(netlbl_domhsh_def);
- if (entry != NULL)
- switch (entry->type) {
- case NETLBL_NLTYPE_UNLABELED:
- break;
- case NETLBL_NLTYPE_CIPSOV4:
- buf_len += 2 * NETLBL_LEN_U32;
- break;
- }
-
- skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
- if (skb == NULL)
- goto dump_default_failure;
-
- if (entry != rcu_dereference(netlbl_domhsh_def))
- goto dump_default_failure;
- if (entry != NULL) {
- if (nla_put_u32(skb, NLA_U32, entry->type) != 0)
- goto dump_default_failure;
- buf_len -= NETLBL_LEN_U32;
- switch (entry->type) {
- case NETLBL_NLTYPE_UNLABELED:
- break;
- case NETLBL_NLTYPE_CIPSOV4:
- if (buf_len < 2 * NETLBL_LEN_U32)
- goto dump_default_failure;
- if (nla_put_u32(skb,
- NLA_U32,
- entry->type_def.cipsov4->type) != 0)
- goto dump_default_failure;
- if (nla_put_u32(skb,
- NLA_U32,
- entry->type_def.cipsov4->doi) != 0)
- goto dump_default_failure;
- buf_len -= 2 * NETLBL_LEN_U32;
- break;
- }
- } else
- nla_put_u32(skb, NLA_U32, NETLBL_NLTYPE_NONE);
- rcu_read_unlock();
-
- return skb;
+ }
-dump_default_failure:
+walk_return:
rcu_read_unlock();
- kfree_skb(skb);
- return NULL;
+ *skip_bkt = iter_bkt;
+ *skip_chain = chain_cnt;
+ return ret_val;
}
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 99a2287de246..02af72a7877c 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -61,7 +61,9 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry);
int netlbl_domhsh_add_default(struct netlbl_dom_map *entry);
int netlbl_domhsh_remove_default(void);
struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
-struct sk_buff *netlbl_domhsh_dump(size_t headroom);
-struct sk_buff *netlbl_domhsh_dump_default(size_t headroom);
+int netlbl_domhsh_walk(u32 *skip_bkt,
+ u32 *skip_chain,
+ int (*callback) (struct netlbl_dom_map *entry, void *arg),
+ void *cb_arg);
#endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 0fd8aaafe23f..54fb7de3c2b1 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -85,6 +85,29 @@ socket_setattr_return:
}
/**
+ * netlbl_sock_getattr - Determine the security attributes of a sock
+ * @sk: the sock
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Examines the given sock to see any NetLabel style labeling has been
+ * applied to the sock, if so it parses the socket label and returns the
+ * security attributes in @secattr. Returns zero on success, negative values
+ * on failure.
+ *
+ */
+int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+
+ ret_val = cipso_v4_sock_getattr(sk, secattr);
+ if (ret_val == 0)
+ return 0;
+
+ return netlbl_unlabel_getattr(secattr);
+}
+
+/**
* netlbl_socket_getattr - Determine the security attributes of a socket
* @sock: the socket
* @secattr: the security attributes
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 85bc11a1fc46..8626c9f678eb 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -42,15 +42,29 @@
#include "netlabel_user.h"
#include "netlabel_mgmt.h"
+/* Argument struct for netlbl_domhsh_walk() */
+struct netlbl_domhsh_walk_arg {
+ struct netlink_callback *nl_cb;
+ struct sk_buff *skb;
+ u32 seq;
+};
+
/* NetLabel Generic NETLINK CIPSOv4 family */
static struct genl_family netlbl_mgmt_gnl_family = {
.id = GENL_ID_GENERATE,
.hdrsize = 0,
.name = NETLBL_NLTYPE_MGMT_NAME,
.version = NETLBL_PROTO_VERSION,
- .maxattr = 0,
+ .maxattr = NLBL_MGMT_A_MAX,
};
+/* NetLabel Netlink attribute policy */
+static struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
+ [NLBL_MGMT_A_DOMAIN] = { .type = NLA_NUL_STRING },
+ [NLBL_MGMT_A_PROTOCOL] = { .type = NLA_U32 },
+ [NLBL_MGMT_A_VERSION] = { .type = NLA_U32 },
+ [NLBL_MGMT_A_CV4DOI] = { .type = NLA_U32 },
+};
/*
* NetLabel Command Handlers
@@ -70,97 +84,62 @@ static struct genl_family netlbl_mgmt_gnl_family = {
static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
{
int ret_val = -EINVAL;
- struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
- int msg_len = netlbl_netlink_payload_len(skb);
- u32 count;
struct netlbl_dom_map *entry = NULL;
- u32 iter;
+ size_t tmp_size;
u32 tmp_val;
- int tmp_size;
- ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
- if (ret_val != 0)
+ if (!info->attrs[NLBL_MGMT_A_DOMAIN] ||
+ !info->attrs[NLBL_MGMT_A_PROTOCOL])
goto add_failure;
- if (msg_len < NETLBL_LEN_U32)
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (entry == NULL) {
+ ret_val = -ENOMEM;
+ goto add_failure;
+ }
+ tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
+ entry->domain = kmalloc(tmp_size, GFP_KERNEL);
+ if (entry->domain == NULL) {
+ ret_val = -ENOMEM;
goto add_failure;
- count = netlbl_getinc_u32(&msg_ptr, &msg_len);
+ }
+ entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
+ nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
- for (iter = 0; iter < count && msg_len > 0; iter++, entry = NULL) {
- if (msg_len <= 0) {
- ret_val = -EINVAL;
- goto add_failure;
- }
- entry = kzalloc(sizeof(*entry), GFP_KERNEL);
- if (entry == NULL) {
- ret_val = -ENOMEM;
- goto add_failure;
- }
- tmp_size = nla_len(msg_ptr);
- if (tmp_size <= 0 || tmp_size > msg_len) {
- ret_val = -EINVAL;
- goto add_failure;
- }
- entry->domain = kmalloc(tmp_size, GFP_KERNEL);
- if (entry->domain == NULL) {
- ret_val = -ENOMEM;
+ switch (entry->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ ret_val = netlbl_domhsh_add(entry);
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ if (!info->attrs[NLBL_MGMT_A_CV4DOI])
goto add_failure;
- }
- nla_strlcpy(entry->domain, msg_ptr, tmp_size);
- entry->domain[tmp_size - 1] = '\0';
- msg_ptr = nla_next(msg_ptr, &msg_len);
- if (msg_len < NETLBL_LEN_U32) {
- ret_val = -EINVAL;
- goto add_failure;
- }
- tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
- entry->type = tmp_val;
- switch (tmp_val) {
- case NETLBL_NLTYPE_UNLABELED:
- ret_val = netlbl_domhsh_add(entry);
- break;
- case NETLBL_NLTYPE_CIPSOV4:
- if (msg_len < NETLBL_LEN_U32) {
- ret_val = -EINVAL;
- goto add_failure;
- }
- tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
- /* We should be holding a rcu_read_lock() here
- * while we hold the result but since the entry
- * will always be deleted when the CIPSO DOI
- * is deleted we aren't going to keep the lock. */
- rcu_read_lock();
- entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
- if (entry->type_def.cipsov4 == NULL) {
- rcu_read_unlock();
- ret_val = -EINVAL;
- goto add_failure;
- }
- ret_val = netlbl_domhsh_add(entry);
+ tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
+ /* We should be holding a rcu_read_lock() here while we hold
+ * the result but since the entry will always be deleted when
+ * the CIPSO DOI is deleted we aren't going to keep the
+ * lock. */
+ rcu_read_lock();
+ entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
+ if (entry->type_def.cipsov4 == NULL) {
rcu_read_unlock();
- break;
- default:
- ret_val = -EINVAL;
- }
- if (ret_val != 0)
goto add_failure;
+ }
+ ret_val = netlbl_domhsh_add(entry);
+ rcu_read_unlock();
+ break;
+ default:
+ goto add_failure;
}
+ if (ret_val != 0)
+ goto add_failure;
- netlbl_netlink_send_ack(info,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_ACK,
- NETLBL_E_OK);
return 0;
add_failure:
if (entry)
kfree(entry->domain);
kfree(entry);
- netlbl_netlink_send_ack(info,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_ACK,
- -ret_val);
return ret_val;
}
@@ -176,87 +155,98 @@ add_failure:
*/
static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
{
- int ret_val = -EINVAL;
- struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
- int msg_len = netlbl_netlink_payload_len(skb);
- u32 count;
- u32 iter;
- int tmp_size;
- unsigned char *domain;
-
- ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
- if (ret_val != 0)
- goto remove_return;
+ char *domain;
- if (msg_len < NETLBL_LEN_U32)
- goto remove_return;
- count = netlbl_getinc_u32(&msg_ptr, &msg_len);
+ if (!info->attrs[NLBL_MGMT_A_DOMAIN])
+ return -EINVAL;
- for (iter = 0; iter < count && msg_len > 0; iter++) {
- if (msg_len <= 0) {
- ret_val = -EINVAL;
- goto remove_return;
- }
- tmp_size = nla_len(msg_ptr);
- domain = nla_data(msg_ptr);
- if (tmp_size <= 0 || tmp_size > msg_len ||
- domain[tmp_size - 1] != '\0') {
- ret_val = -EINVAL;
- goto remove_return;
- }
- ret_val = netlbl_domhsh_remove(domain);
+ domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]);
+ return netlbl_domhsh_remove(domain);
+}
+
+/**
+ * netlbl_mgmt_listall_cb - netlbl_domhsh_walk() callback for LISTALL
+ * @entry: the domain mapping hash table entry
+ * @arg: the netlbl_domhsh_walk_arg structure
+ *
+ * Description:
+ * This function is designed to be used as a callback to the
+ * netlbl_domhsh_walk() function for use in generating a response for a LISTALL
+ * message. Returns the size of the message on success, negative values on
+ * failure.
+ *
+ */
+static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
+{
+ int ret_val = -ENOMEM;
+ struct netlbl_domhsh_walk_arg *cb_arg = arg;
+ void *data;
+
+ data = netlbl_netlink_hdr_put(cb_arg->skb,
+ NETLINK_CB(cb_arg->nl_cb->skb).pid,
+ cb_arg->seq,
+ netlbl_mgmt_gnl_family.id,
+ NLM_F_MULTI,
+ NLBL_MGMT_C_LISTALL);
+ if (data == NULL)
+ goto listall_cb_failure;
+
+ ret_val = nla_put_string(cb_arg->skb,
+ NLBL_MGMT_A_DOMAIN,
+ entry->domain);
+ if (ret_val != 0)
+ goto listall_cb_failure;
+ ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type);
+ if (ret_val != 0)
+ goto listall_cb_failure;
+ switch (entry->type) {
+ case NETLBL_NLTYPE_CIPSOV4:
+ ret_val = nla_put_u32(cb_arg->skb,
+ NLBL_MGMT_A_CV4DOI,
+ entry->type_def.cipsov4->doi);
if (ret_val != 0)
- goto remove_return;
- msg_ptr = nla_next(msg_ptr, &msg_len);
+ goto listall_cb_failure;
+ break;
}
- ret_val = 0;
+ cb_arg->seq++;
+ return genlmsg_end(cb_arg->skb, data);
-remove_return:
- netlbl_netlink_send_ack(info,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_ACK,
- -ret_val);
+listall_cb_failure:
+ genlmsg_cancel(cb_arg->skb, data);
return ret_val;
}
/**
- * netlbl_mgmt_list - Handle a LIST message
+ * netlbl_mgmt_listall - Handle a LISTALL message
* @skb: the NETLINK buffer
- * @info: the Generic NETLINK info block
+ * @cb: the NETLINK callback
*
* Description:
- * Process a user generated LIST message and dumps the domain hash table in a
- * form suitable for use in a kernel generated LIST message. Returns zero on
- * success, negative values on failure.
+ * Process a user generated LISTALL message and dumps the domain hash table in
+ * a form suitable for use in a kernel generated LISTALL message. Returns zero
+ * on success, negative values on failure.
*
*/
-static int netlbl_mgmt_list(struct sk_buff *skb, struct genl_info *info)
+static int netlbl_mgmt_listall(struct sk_buff *skb,
+ struct netlink_callback *cb)
{
- int ret_val = -ENOMEM;
- struct sk_buff *ans_skb;
-
- ans_skb = netlbl_domhsh_dump(NLMSG_SPACE(GENL_HDRLEN));
- if (ans_skb == NULL)
- goto list_failure;
- netlbl_netlink_hdr_push(ans_skb,
- info->snd_pid,
- 0,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_LIST);
-
- ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
- if (ret_val != 0)
- goto list_failure;
-
- return 0;
-
-list_failure:
- netlbl_netlink_send_ack(info,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_ACK,
- -ret_val);
- return ret_val;
+ struct netlbl_domhsh_walk_arg cb_arg;
+ u32 skip_bkt = cb->args[0];
+ u32 skip_chain = cb->args[1];
+
+ cb_arg.nl_cb = cb;
+ cb_arg.skb = skb;
+ cb_arg.seq = cb->nlh->nlmsg_seq;
+
+ netlbl_domhsh_walk(&skip_bkt,
+ &skip_chain,
+ netlbl_mgmt_listall_cb,
+ &cb_arg);
+
+ cb->args[0] = skip_bkt;
+ cb->args[1] = skip_chain;
+ return skb->len;
}
/**
@@ -272,68 +262,51 @@ list_failure:
static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
{
int ret_val = -EINVAL;
- struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
- int msg_len = netlbl_netlink_payload_len(skb);
struct netlbl_dom_map *entry = NULL;
u32 tmp_val;
- ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
- if (ret_val != 0)
- goto adddef_failure;
-
- if (msg_len < NETLBL_LEN_U32)
+ if (!info->attrs[NLBL_MGMT_A_PROTOCOL])
goto adddef_failure;
- tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (entry == NULL) {
ret_val = -ENOMEM;
goto adddef_failure;
}
+ entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
- entry->type = tmp_val;
switch (entry->type) {
case NETLBL_NLTYPE_UNLABELED:
ret_val = netlbl_domhsh_add_default(entry);
break;
case NETLBL_NLTYPE_CIPSOV4:
- if (msg_len < NETLBL_LEN_U32) {
- ret_val = -EINVAL;
+ if (!info->attrs[NLBL_MGMT_A_CV4DOI])
goto adddef_failure;
- }
- tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
- /* We should be holding a rcu_read_lock here while we
- * hold the result but since the entry will always be
- * deleted when the CIPSO DOI is deleted we are going
- * to skip the lock. */
+
+ tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
+ /* We should be holding a rcu_read_lock() here while we hold
+ * the result but since the entry will always be deleted when
+ * the CIPSO DOI is deleted we aren't going to keep the
+ * lock. */
rcu_read_lock();
entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
if (entry->type_def.cipsov4 == NULL) {
rcu_read_unlock();
- ret_val = -EINVAL;
goto adddef_failure;
}
ret_val = netlbl_domhsh_add_default(entry);
rcu_read_unlock();
break;
default:
- ret_val = -EINVAL;
+ goto adddef_failure;
}
if (ret_val != 0)
goto adddef_failure;
- netlbl_netlink_send_ack(info,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_ACK,
- NETLBL_E_OK);
return 0;
adddef_failure:
kfree(entry);
- netlbl_netlink_send_ack(info,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_ACK,
- -ret_val);
return ret_val;
}
@@ -349,20 +322,7 @@ adddef_failure:
*/
static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
{
- int ret_val;
-
- ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
- if (ret_val != 0)
- goto removedef_return;
-
- ret_val = netlbl_domhsh_remove_default();
-
-removedef_return:
- netlbl_netlink_send_ack(info,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_ACK,
- -ret_val);
- return ret_val;
+ return netlbl_domhsh_remove_default();
}
/**
@@ -379,88 +339,131 @@ removedef_return:
static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
{
int ret_val = -ENOMEM;
- struct sk_buff *ans_skb;
+ struct sk_buff *ans_skb = NULL;
+ void *data;
+ struct netlbl_dom_map *entry;
- ans_skb = netlbl_domhsh_dump_default(NLMSG_SPACE(GENL_HDRLEN));
+ ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (ans_skb == NULL)
+ return -ENOMEM;
+ data = netlbl_netlink_hdr_put(ans_skb,
+ info->snd_pid,
+ info->snd_seq,
+ netlbl_mgmt_gnl_family.id,
+ 0,
+ NLBL_MGMT_C_LISTDEF);
+ if (data == NULL)
goto listdef_failure;
- netlbl_netlink_hdr_push(ans_skb,
- info->snd_pid,
- 0,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_LISTDEF);
- ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+ rcu_read_lock();
+ entry = netlbl_domhsh_getentry(NULL);
+ if (entry == NULL) {
+ ret_val = -ENOENT;
+ goto listdef_failure_lock;
+ }
+ ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type);
if (ret_val != 0)
- goto listdef_failure;
+ goto listdef_failure_lock;
+ switch (entry->type) {
+ case NETLBL_NLTYPE_CIPSOV4:
+ ret_val = nla_put_u32(ans_skb,
+ NLBL_MGMT_A_CV4DOI,
+ entry->type_def.cipsov4->doi);
+ if (ret_val != 0)
+ goto listdef_failure_lock;
+ break;
+ }
+ rcu_read_unlock();
+ genlmsg_end(ans_skb, data);
+
+ ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
+ if (ret_val != 0)
+ goto listdef_failure;
return 0;
+listdef_failure_lock:
+ rcu_read_unlock();
listdef_failure:
- netlbl_netlink_send_ack(info,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_ACK,
- -ret_val);
+ kfree_skb(ans_skb);
return ret_val;
}
/**
- * netlbl_mgmt_modules - Handle a MODULES message
- * @skb: the NETLINK buffer
- * @info: the Generic NETLINK info block
+ * netlbl_mgmt_protocols_cb - Write an individual PROTOCOL message response
+ * @skb: the skb to write to
+ * @seq: the NETLINK sequence number
+ * @cb: the NETLINK callback
+ * @protocol: the NetLabel protocol to use in the message
*
* Description:
- * Process a user generated MODULES message and respond accordingly.
+ * This function is to be used in conjunction with netlbl_mgmt_protocols() to
+ * answer a application's PROTOCOLS message. Returns the size of the message
+ * on success, negative values on failure.
*
*/
-static int netlbl_mgmt_modules(struct sk_buff *skb, struct genl_info *info)
+static int netlbl_mgmt_protocols_cb(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ u32 protocol)
{
int ret_val = -ENOMEM;
- size_t data_size;
- u32 mod_count;
- struct sk_buff *ans_skb = NULL;
-
- /* unlabeled + cipsov4 */
- mod_count = 2;
-
- data_size = GENL_HDRLEN + NETLBL_LEN_U32 + mod_count * NETLBL_LEN_U32;
- ans_skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
- if (ans_skb == NULL)
- goto modules_failure;
-
- if (netlbl_netlink_hdr_put(ans_skb,
- info->snd_pid,
- 0,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_MODULES) == NULL)
- goto modules_failure;
-
- ret_val = nla_put_u32(ans_skb, NLA_U32, mod_count);
- if (ret_val != 0)
- goto modules_failure;
- ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_UNLABELED);
+ void *data;
+
+ data = netlbl_netlink_hdr_put(skb,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ netlbl_mgmt_gnl_family.id,
+ NLM_F_MULTI,
+ NLBL_MGMT_C_PROTOCOLS);
+ if (data == NULL)
+ goto protocols_cb_failure;
+
+ ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, protocol);
if (ret_val != 0)
- goto modules_failure;
- ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_CIPSOV4);
- if (ret_val != 0)
- goto modules_failure;
-
- ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
- if (ret_val != 0)
- goto modules_failure;
+ goto protocols_cb_failure;
- return 0;
+ return genlmsg_end(skb, data);
-modules_failure:
- kfree_skb(ans_skb);
- netlbl_netlink_send_ack(info,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_ACK,
- -ret_val);
+protocols_cb_failure:
+ genlmsg_cancel(skb, data);
return ret_val;
}
/**
+ * netlbl_mgmt_protocols - Handle a PROTOCOLS message
+ * @skb: the NETLINK buffer
+ * @cb: the NETLINK callback
+ *
+ * Description:
+ * Process a user generated PROTOCOLS message and respond accordingly.
+ *
+ */
+static int netlbl_mgmt_protocols(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ u32 protos_sent = cb->args[0];
+
+ if (protos_sent == 0) {
+ if (netlbl_mgmt_protocols_cb(skb,
+ cb,
+ NETLBL_NLTYPE_UNLABELED) < 0)
+ goto protocols_return;
+ protos_sent++;
+ }
+ if (protos_sent == 1) {
+ if (netlbl_mgmt_protocols_cb(skb,
+ cb,
+ NETLBL_NLTYPE_CIPSOV4) < 0)
+ goto protocols_return;
+ protos_sent++;
+ }
+
+protocols_return:
+ cb->args[0] = protos_sent;
+ return skb->len;
+}
+
+/**
* netlbl_mgmt_version - Handle a VERSION message
* @skb: the NETLINK buffer
* @info: the Generic NETLINK info block
@@ -474,35 +477,35 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
{
int ret_val = -ENOMEM;
struct sk_buff *ans_skb = NULL;
+ void *data;
- ans_skb = netlbl_netlink_alloc_skb(0,
- GENL_HDRLEN + NETLBL_LEN_U32,
- GFP_KERNEL);
+ ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (ans_skb == NULL)
- goto version_failure;
- if (netlbl_netlink_hdr_put(ans_skb,
- info->snd_pid,
- 0,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_VERSION) == NULL)
+ return -ENOMEM;
+ data = netlbl_netlink_hdr_put(ans_skb,
+ info->snd_pid,
+ info->snd_seq,
+ netlbl_mgmt_gnl_family.id,
+ 0,
+ NLBL_MGMT_C_VERSION);
+ if (data == NULL)
goto version_failure;
- ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_PROTO_VERSION);
+ ret_val = nla_put_u32(ans_skb,
+ NLBL_MGMT_A_VERSION,
+ NETLBL_PROTO_VERSION);
if (ret_val != 0)
goto version_failure;
- ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+ genlmsg_end(ans_skb, data);
+
+ ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
if (ret_val != 0)
goto version_failure;
-
return 0;
version_failure:
kfree_skb(ans_skb);
- netlbl_netlink_send_ack(info,
- netlbl_mgmt_gnl_family.id,
- NLBL_MGMT_C_ACK,
- -ret_val);
return ret_val;
}
@@ -513,35 +516,40 @@ version_failure:
static struct genl_ops netlbl_mgmt_genl_c_add = {
.cmd = NLBL_MGMT_C_ADD,
- .flags = 0,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_mgmt_genl_policy,
.doit = netlbl_mgmt_add,
.dumpit = NULL,
};
static struct genl_ops netlbl_mgmt_genl_c_remove = {
.cmd = NLBL_MGMT_C_REMOVE,
- .flags = 0,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_mgmt_genl_policy,
.doit = netlbl_mgmt_remove,
.dumpit = NULL,
};
-static struct genl_ops netlbl_mgmt_genl_c_list = {
- .cmd = NLBL_MGMT_C_LIST,
+static struct genl_ops netlbl_mgmt_genl_c_listall = {
+ .cmd = NLBL_MGMT_C_LISTALL,
.flags = 0,
- .doit = netlbl_mgmt_list,
- .dumpit = NULL,
+ .policy = netlbl_mgmt_genl_policy,
+ .doit = NULL,
+ .dumpit = netlbl_mgmt_listall,
};
static struct genl_ops netlbl_mgmt_genl_c_adddef = {
.cmd = NLBL_MGMT_C_ADDDEF,
- .flags = 0,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_mgmt_genl_policy,
.doit = netlbl_mgmt_adddef,
.dumpit = NULL,
};
static struct genl_ops netlbl_mgmt_genl_c_removedef = {
.cmd = NLBL_MGMT_C_REMOVEDEF,
- .flags = 0,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_mgmt_genl_policy,
.doit = netlbl_mgmt_removedef,
.dumpit = NULL,
};
@@ -549,20 +557,23 @@ static struct genl_ops netlbl_mgmt_genl_c_removedef = {
static struct genl_ops netlbl_mgmt_genl_c_listdef = {
.cmd = NLBL_MGMT_C_LISTDEF,
.flags = 0,
+ .policy = netlbl_mgmt_genl_policy,
.doit = netlbl_mgmt_listdef,
.dumpit = NULL,
};
-static struct genl_ops netlbl_mgmt_genl_c_modules = {
- .cmd = NLBL_MGMT_C_MODULES,
+static struct genl_ops netlbl_mgmt_genl_c_protocols = {
+ .cmd = NLBL_MGMT_C_PROTOCOLS,
.flags = 0,
- .doit = netlbl_mgmt_modules,
- .dumpit = NULL,
+ .policy = netlbl_mgmt_genl_policy,
+ .doit = NULL,
+ .dumpit = netlbl_mgmt_protocols,
};
static struct genl_ops netlbl_mgmt_genl_c_version = {
.cmd = NLBL_MGMT_C_VERSION,
.flags = 0,
+ .policy = netlbl_mgmt_genl_policy,
.doit = netlbl_mgmt_version,
.dumpit = NULL,
};
@@ -596,7 +607,7 @@ int netlbl_mgmt_genl_init(void)
if (ret_val != 0)
return ret_val;
ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
- &netlbl_mgmt_genl_c_list);
+ &netlbl_mgmt_genl_c_listall);
if (ret_val != 0)
return ret_val;
ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
@@ -612,7 +623,7 @@ int netlbl_mgmt_genl_init(void)
if (ret_val != 0)
return ret_val;
ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
- &netlbl_mgmt_genl_c_modules);
+ &netlbl_mgmt_genl_c_protocols);
if (ret_val != 0)
return ret_val;
ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index fd6c6acbfa08..3642d3bfc8eb 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -34,212 +34,137 @@
#include <net/netlabel.h>
/*
- * The following NetLabel payloads are supported by the management interface,
- * all of which are preceeded by the nlmsghdr struct.
- *
- * o ACK:
- * Sent by the kernel in response to an applications message, applications
- * should never send this message.
- *
- * +----------------------+-----------------------+
- * | seq number (32 bits) | return code (32 bits) |
- * +----------------------+-----------------------+
- *
- * seq number: the sequence number of the original message, taken from the
- * nlmsghdr structure
- * return code: return value, based on errno values
+ * The following NetLabel payloads are supported by the management interface.
*
* o ADD:
* Sent by an application to add a domain mapping to the NetLabel system.
- * The kernel should respond with an ACK.
- *
- * +-------------------+
- * | domains (32 bits) | ...
- * +-------------------+
- *
- * domains: the number of domains in the message
- *
- * +--------------------------+-------------------------+
- * | domain string (variable) | protocol type (32 bits) | ...
- * +--------------------------+-------------------------+
*
- * +-------------- ---- --- -- -
- * | mapping data ... repeated
- * +-------------- ---- --- -- -
+ * Required attributes:
*
- * domain string: the domain string, NULL terminated
- * protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
- * mapping data: specific to the map type (see below)
+ * NLBL_MGMT_A_DOMAIN
+ * NLBL_MGMT_A_PROTOCOL
*
- * NETLBL_NLTYPE_UNLABELED
+ * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
*
- * No mapping data for this protocol type.
+ * NLBL_MGMT_A_CV4DOI
*
- * NETLBL_NLTYPE_CIPSOV4
- *
- * +---------------+
- * | doi (32 bits) |
- * +---------------+
- *
- * doi: the CIPSO DOI value
+ * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
*
* o REMOVE:
* Sent by an application to remove a domain mapping from the NetLabel
- * system. The kernel should ACK this message.
- *
- * +-------------------+
- * | domains (32 bits) | ...
- * +-------------------+
+ * system.
*
- * domains: the number of domains in the message
+ * Required attributes:
*
- * +--------------------------+
- * | domain string (variable) | ...
- * +--------------------------+
+ * NLBL_MGMT_A_DOMAIN
*
- * domain string: the domain string, NULL terminated
- *
- * o LIST:
+ * o LISTALL:
* This message can be sent either from an application or by the kernel in
- * response to an application generated LIST message. When sent by an
- * application there is no payload. The kernel should respond to a LIST
- * message either with a LIST message on success or an ACK message on
- * failure.
- *
- * +-------------------+
- * | domains (32 bits) | ...
- * +-------------------+
- *
- * domains: the number of domains in the message
+ * response to an application generated LISTALL message. When sent by an
+ * application there is no payload and the NLM_F_DUMP flag should be set.
+ * The kernel should respond with a series of the following messages.
*
- * +--------------------------+
- * | domain string (variable) | ...
- * +--------------------------+
+ * Required attributes:
*
- * +-------------------------+-------------- ---- --- -- -
- * | protocol type (32 bits) | mapping data ... repeated
- * +-------------------------+-------------- ---- --- -- -
+ * NLBL_MGMT_A_DOMAIN
+ * NLBL_MGMT_A_PROTOCOL
*
- * domain string: the domain string, NULL terminated
- * protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
- * mapping data: specific to the map type (see below)
+ * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
*
- * NETLBL_NLTYPE_UNLABELED
+ * NLBL_MGMT_A_CV4DOI
*
- * No mapping data for this protocol type.
- *
- * NETLBL_NLTYPE_CIPSOV4
- *
- * +----------------+---------------+
- * | type (32 bits) | doi (32 bits) |
- * +----------------+---------------+
- *
- * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
- * as CIPSO_V4_MAP_*)
- * doi: the CIPSO DOI value
+ * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
*
* o ADDDEF:
* Sent by an application to set the default domain mapping for the NetLabel
- * system. The kernel should respond with an ACK.
+ * system.
*
- * +-------------------------+-------------- ---- --- -- -
- * | protocol type (32 bits) | mapping data ... repeated
- * +-------------------------+-------------- ---- --- -- -
+ * Required attributes:
*
- * protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
- * mapping data: specific to the map type (see below)
+ * NLBL_MGMT_A_PROTOCOL
*
- * NETLBL_NLTYPE_UNLABELED
+ * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
*
- * No mapping data for this protocol type.
+ * NLBL_MGMT_A_CV4DOI
*
- * NETLBL_NLTYPE_CIPSOV4
- *
- * +---------------+
- * | doi (32 bits) |
- * +---------------+
- *
- * doi: the CIPSO DOI value
+ * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
*
* o REMOVEDEF:
* Sent by an application to remove the default domain mapping from the
- * NetLabel system, there is no payload. The kernel should ACK this message.
+ * NetLabel system, there is no payload.
*
* o LISTDEF:
* This message can be sent either from an application or by the kernel in
* response to an application generated LISTDEF message. When sent by an
- * application there is no payload. The kernel should respond to a
- * LISTDEF message either with a LISTDEF message on success or an ACK message
- * on failure.
- *
- * +-------------------------+-------------- ---- --- -- -
- * | protocol type (32 bits) | mapping data ... repeated
- * +-------------------------+-------------- ---- --- -- -
+ * application there is no payload. On success the kernel should send a
+ * response using the following format.
*
- * protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
- * mapping data: specific to the map type (see below)
+ * Required attributes:
*
- * NETLBL_NLTYPE_UNLABELED
+ * NLBL_MGMT_A_PROTOCOL
*
- * No mapping data for this protocol type.
+ * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
*
- * NETLBL_NLTYPE_CIPSOV4
+ * NLBL_MGMT_A_CV4DOI
*
- * +----------------+---------------+
- * | type (32 bits) | doi (32 bits) |
- * +----------------+---------------+
+ * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
*
- * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
- * as CIPSO_V4_MAP_*)
- * doi: the CIPSO DOI value
+ * o PROTOCOLS:
+ * Sent by an application to request a list of configured NetLabel protocols
+ * in the kernel. When sent by an application there is no payload and the
+ * NLM_F_DUMP flag should be set. The kernel should respond with a series of
+ * the following messages.
*
- * o MODULES:
- * Sent by an application to request a list of configured NetLabel modules
- * in the kernel. When sent by an application there is no payload.
+ * Required attributes:
*
- * +-------------------+
- * | modules (32 bits) | ...
- * +-------------------+
- *
- * modules: the number of modules in the message, if this is an application
- * generated message and the value is zero then return a list of
- * the configured modules
- *
- * +------------------+
- * | module (32 bits) | ... repeated
- * +------------------+
- *
- * module: the module number as defined by NETLBL_NLTYPE_*
+ * NLBL_MGMT_A_PROTOCOL
*
* o VERSION:
- * Sent by an application to request the NetLabel version string. When sent
- * by an application there is no payload. This message type is also used by
- * the kernel to respond to an VERSION request.
+ * Sent by an application to request the NetLabel version. When sent by an
+ * application there is no payload. This message type is also used by the
+ * kernel to respond to an VERSION request.
*
- * +-------------------+
- * | version (32 bits) |
- * +-------------------+
+ * Required attributes:
*
- * version: the protocol version number
+ * NLBL_MGMT_A_VERSION
*
*/
/* NetLabel Management commands */
enum {
NLBL_MGMT_C_UNSPEC,
- NLBL_MGMT_C_ACK,
NLBL_MGMT_C_ADD,
NLBL_MGMT_C_REMOVE,
- NLBL_MGMT_C_LIST,
+ NLBL_MGMT_C_LISTALL,
NLBL_MGMT_C_ADDDEF,
NLBL_MGMT_C_REMOVEDEF,
NLBL_MGMT_C_LISTDEF,
- NLBL_MGMT_C_MODULES,
+ NLBL_MGMT_C_PROTOCOLS,
NLBL_MGMT_C_VERSION,
__NLBL_MGMT_C_MAX,
};
#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1)
+/* NetLabel Management attributes */
+enum {
+ NLBL_MGMT_A_UNSPEC,
+ NLBL_MGMT_A_DOMAIN,
+ /* (NLA_NUL_STRING)
+ * the NULL terminated LSM domain string */
+ NLBL_MGMT_A_PROTOCOL,
+ /* (NLA_U32)
+ * the NetLabel protocol type (defined by NETLBL_NLTYPE_*) */
+ NLBL_MGMT_A_VERSION,
+ /* (NLA_U32)
+ * the NetLabel protocol version number (defined by
+ * NETLBL_PROTO_VERSION) */
+ NLBL_MGMT_A_CV4DOI,
+ /* (NLA_U32)
+ * the CIPSOv4 DOI value */
+ __NLBL_MGMT_A_MAX,
+};
+#define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1)
+
/* NetLabel protocol functions */
int netlbl_mgmt_genl_init(void);
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 785f4960e0d3..440f5c4e1e2d 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -55,9 +55,13 @@ static struct genl_family netlbl_unlabel_gnl_family = {
.hdrsize = 0,
.name = NETLBL_NLTYPE_UNLABELED_NAME,
.version = NETLBL_PROTO_VERSION,
- .maxattr = 0,
+ .maxattr = NLBL_UNLABEL_A_MAX,
};
+/* NetLabel Netlink attribute policy */
+static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
+ [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 },
+};
/*
* NetLabel Command Handlers
@@ -75,31 +79,18 @@ static struct genl_family netlbl_unlabel_gnl_family = {
*/
static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
{
- int ret_val;
- struct nlattr *data = netlbl_netlink_payload_data(skb);
- u32 value;
-
- ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
- if (ret_val != 0)
- return ret_val;
+ int ret_val = -EINVAL;
+ u8 value;
- if (netlbl_netlink_payload_len(skb) == NETLBL_LEN_U32) {
- value = nla_get_u32(data);
+ if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) {
+ value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]);
if (value == 1 || value == 0) {
atomic_set(&netlabel_unlabel_accept_flg, value);
- netlbl_netlink_send_ack(info,
- netlbl_unlabel_gnl_family.id,
- NLBL_UNLABEL_C_ACK,
- NETLBL_E_OK);
- return 0;
+ ret_val = 0;
}
}
- netlbl_netlink_send_ack(info,
- netlbl_unlabel_gnl_family.id,
- NLBL_UNLABEL_C_ACK,
- EINVAL);
- return -EINVAL;
+ return ret_val;
}
/**
@@ -114,39 +105,39 @@ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
*/
static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
{
- int ret_val = -ENOMEM;
+ int ret_val = -EINVAL;
struct sk_buff *ans_skb;
+ void *data;
- ans_skb = netlbl_netlink_alloc_skb(0,
- GENL_HDRLEN + NETLBL_LEN_U32,
- GFP_KERNEL);
+ ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (ans_skb == NULL)
goto list_failure;
-
- if (netlbl_netlink_hdr_put(ans_skb,
- info->snd_pid,
- 0,
- netlbl_unlabel_gnl_family.id,
- NLBL_UNLABEL_C_LIST) == NULL)
+ data = netlbl_netlink_hdr_put(ans_skb,
+ info->snd_pid,
+ info->snd_seq,
+ netlbl_unlabel_gnl_family.id,
+ 0,
+ NLBL_UNLABEL_C_LIST);
+ if (data == NULL) {
+ ret_val = -ENOMEM;
goto list_failure;
+ }
- ret_val = nla_put_u32(ans_skb,
- NLA_U32,
- atomic_read(&netlabel_unlabel_accept_flg));
+ ret_val = nla_put_u8(ans_skb,
+ NLBL_UNLABEL_A_ACPTFLG,
+ atomic_read(&netlabel_unlabel_accept_flg));
if (ret_val != 0)
goto list_failure;
- ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+ genlmsg_end(ans_skb, data);
+
+ ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
if (ret_val != 0)
goto list_failure;
-
return 0;
list_failure:
- netlbl_netlink_send_ack(info,
- netlbl_unlabel_gnl_family.id,
- NLBL_UNLABEL_C_ACK,
- -ret_val);
+ kfree(ans_skb);
return ret_val;
}
@@ -157,7 +148,8 @@ list_failure:
static struct genl_ops netlbl_unlabel_genl_c_accept = {
.cmd = NLBL_UNLABEL_C_ACCEPT,
- .flags = 0,
+ .flags = GENL_ADMIN_PERM,
+ .policy = netlbl_unlabel_genl_policy,
.doit = netlbl_unlabel_accept,
.dumpit = NULL,
};
@@ -165,6 +157,7 @@ static struct genl_ops netlbl_unlabel_genl_c_accept = {
static struct genl_ops netlbl_unlabel_genl_c_list = {
.cmd = NLBL_UNLABEL_C_LIST,
.flags = 0,
+ .policy = netlbl_unlabel_genl_policy,
.doit = netlbl_unlabel_list,
.dumpit = NULL,
};
@@ -218,10 +211,8 @@ int netlbl_unlabel_genl_init(void)
*/
int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr)
{
- if (atomic_read(&netlabel_unlabel_accept_flg) == 1) {
- memset(secattr, 0, sizeof(*secattr));
- return 0;
- }
+ if (atomic_read(&netlabel_unlabel_accept_flg) == 1)
+ return netlbl_secattr_init(secattr);
return -ENOMSG;
}
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
index f300e54e14b6..c2917fbb42cf 100644
--- a/net/netlabel/netlabel_unlabeled.h
+++ b/net/netlabel/netlabel_unlabeled.h
@@ -36,56 +36,47 @@
/*
* The following NetLabel payloads are supported by the Unlabeled subsystem.
*
- * o ACK:
- * Sent by the kernel in response to an applications message, applications
- * should never send this message.
- *
- * +----------------------+-----------------------+
- * | seq number (32 bits) | return code (32 bits) |
- * +----------------------+-----------------------+
- *
- * seq number: the sequence number of the original message, taken from the
- * nlmsghdr structure
- * return code: return value, based on errno values
- *
* o ACCEPT
* This message is sent from an application to specify if the kernel should
* allow unlabled packets to pass if they do not match any of the static
* mappings defined in the unlabeled module.
*
- * +-----------------+
- * | allow (32 bits) |
- * +-----------------+
+ * Required attributes:
*
- * allow: if true (1) then allow the packets to pass, if false (0) then
- * reject the packets
+ * NLBL_UNLABEL_A_ACPTFLG
*
* o LIST
* This message can be sent either from an application or by the kernel in
* response to an application generated LIST message. When sent by an
* application there is no payload. The kernel should respond to a LIST
- * message either with a LIST message on success or an ACK message on
- * failure.
+ * message with a LIST message on success.
*
- * +-----------------------+
- * | accept flag (32 bits) |
- * +-----------------------+
+ * Required attributes:
*
- * accept flag: if true (1) then unlabeled packets are allowed to pass,
- * if false (0) then unlabeled packets are rejected
+ * NLBL_UNLABEL_A_ACPTFLG
*
*/
/* NetLabel Unlabeled commands */
enum {
NLBL_UNLABEL_C_UNSPEC,
- NLBL_UNLABEL_C_ACK,
NLBL_UNLABEL_C_ACCEPT,
NLBL_UNLABEL_C_LIST,
__NLBL_UNLABEL_C_MAX,
};
#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1)
+/* NetLabel Unlabeled attributes */
+enum {
+ NLBL_UNLABEL_A_UNSPEC,
+ NLBL_UNLABEL_A_ACPTFLG,
+ /* (NLA_U8)
+ * if true then unlabeled packets are allowed to pass, else unlabeled
+ * packets are rejected */
+ __NLBL_UNLABEL_A_MAX,
+};
+#define NLBL_UNLABEL_A_MAX (__NLBL_UNLABEL_A_MAX - 1)
+
/* NetLabel protocol functions */
int netlbl_unlabel_genl_init(void);
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 73cbe66e42ff..eeb7d768d2bb 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -74,85 +74,3 @@ int netlbl_netlink_init(void)
return 0;
}
-
-/*
- * NetLabel Common Protocol Functions
- */
-
-/**
- * netlbl_netlink_send_ack - Send an ACK message
- * @info: the generic NETLINK information
- * @genl_family: the generic NETLINK family ID value
- * @ack_cmd: the generic NETLINK family ACK command value
- * @ret_code: return code to use
- *
- * Description:
- * This function sends an ACK message to the sender of the NETLINK message
- * specified by @info.
- *
- */
-void netlbl_netlink_send_ack(const struct genl_info *info,
- u32 genl_family,
- u8 ack_cmd,
- u32 ret_code)
-{
- size_t data_size;
- struct sk_buff *skb;
-
- data_size = GENL_HDRLEN + 2 * NETLBL_LEN_U32;
- skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
- if (skb == NULL)
- return;
-
- if (netlbl_netlink_hdr_put(skb,
- info->snd_pid,
- 0,
- genl_family,
- ack_cmd) == NULL)
- goto send_ack_failure;
-
- if (nla_put_u32(skb, NLA_U32, info->snd_seq) != 0)
- goto send_ack_failure;
- if (nla_put_u32(skb, NLA_U32, ret_code) != 0)
- goto send_ack_failure;
-
- netlbl_netlink_snd(skb, info->snd_pid);
- return;
-
-send_ack_failure:
- kfree_skb(skb);
-}
-
-/*
- * NETLINK I/O Functions
- */
-
-/**
- * netlbl_netlink_snd - Send a NetLabel message
- * @skb: NetLabel message
- * @pid: destination PID
- *
- * Description:
- * Sends a unicast NetLabel message over the NETLINK socket.
- *
- */
-int netlbl_netlink_snd(struct sk_buff *skb, u32 pid)
-{
- return genlmsg_unicast(skb, pid);
-}
-
-/**
- * netlbl_netlink_snd - Send a NetLabel message
- * @skb: NetLabel message
- * @pid: sending PID
- * @group: multicast group id
- *
- * Description:
- * Sends a multicast NetLabel message over the NETLINK socket to all members
- * of @group except @pid.
- *
- */
-int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group)
-{
- return genlmsg_multicast(skb, pid, group, GFP_KERNEL);
-}
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index 385a6c7488c6..3f9386b917df 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -41,72 +41,6 @@
/* NetLabel NETLINK helper functions */
/**
- * netlbl_netlink_cap_check - Check the NETLINK msg capabilities
- * @skb: the NETLINK buffer
- * @req_cap: the required capability
- *
- * Description:
- * Check the NETLINK buffer's capabilities against the required capabilities.
- * Returns zero on success, negative values on failure.
- *
- */
-static inline int netlbl_netlink_cap_check(const struct sk_buff *skb,
- kernel_cap_t req_cap)
-{
- if (cap_raised(NETLINK_CB(skb).eff_cap, req_cap))
- return 0;
- return -EPERM;
-}
-
-/**
- * netlbl_getinc_u8 - Read a u8 value from a nlattr stream and move on
- * @nla: the attribute
- * @rem_len: remaining length
- *
- * Description:
- * Return a u8 value pointed to by @nla and advance it to the next attribute.
- *
- */
-static inline u8 netlbl_getinc_u8(struct nlattr **nla, int *rem_len)
-{
- u8 val = nla_get_u8(*nla);
- *nla = nla_next(*nla, rem_len);
- return val;
-}
-
-/**
- * netlbl_getinc_u16 - Read a u16 value from a nlattr stream and move on
- * @nla: the attribute
- * @rem_len: remaining length
- *
- * Description:
- * Return a u16 value pointed to by @nla and advance it to the next attribute.
- *
- */
-static inline u16 netlbl_getinc_u16(struct nlattr **nla, int *rem_len)
-{
- u16 val = nla_get_u16(*nla);
- *nla = nla_next(*nla, rem_len);
- return val;
-}
-
-/**
- * netlbl_getinc_u32 - Read a u32 value from a nlattr stream and move on
- * @nla: the attribute
- * @rem_len: remaining length
- *
- * Description:
- * Return a u32 value pointed to by @nla and advance it to the next attribute.
- *
- */
-static inline u32 netlbl_getinc_u32(struct nlattr **nla, int *rem_len)
-{
- u32 val = nla_get_u32(*nla);
- *nla = nla_next(*nla, rem_len);
- return val;
-}
-
-/**
* netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff
* @skb: the packet
* @pid: the PID of the receipient
@@ -124,6 +58,7 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
u32 pid,
u32 seq,
int type,
+ int flags,
u8 cmd)
{
return genlmsg_put(skb,
@@ -131,85 +66,13 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
seq,
type,
0,
- 0,
+ flags,
cmd,
NETLBL_PROTO_VERSION);
}
-/**
- * netlbl_netlink_hdr_push - Write the NETLINK buffers into a sk_buff
- * @skb: the packet
- * @pid: the PID of the receipient
- * @seq: the sequence number
- * @type: the generic NETLINK message family type
- * @cmd: command
- *
- * Description:
- * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
- * struct to the packet.
- *
- */
-static inline void netlbl_netlink_hdr_push(struct sk_buff *skb,
- u32 pid,
- u32 seq,
- int type,
- u8 cmd)
-
-{
- struct nlmsghdr *nlh;
- struct genlmsghdr *hdr;
-
- nlh = (struct nlmsghdr *)skb_push(skb, NLMSG_SPACE(GENL_HDRLEN));
- nlh->nlmsg_type = type;
- nlh->nlmsg_len = skb->len;
- nlh->nlmsg_flags = 0;
- nlh->nlmsg_pid = pid;
- nlh->nlmsg_seq = seq;
-
- hdr = nlmsg_data(nlh);
- hdr->cmd = cmd;
- hdr->version = NETLBL_PROTO_VERSION;
- hdr->reserved = 0;
-}
-
-/**
- * netlbl_netlink_payload_len - Return the length of the payload
- * @skb: the NETLINK buffer
- *
- * Description:
- * This function returns the length of the NetLabel payload.
- *
- */
-static inline u32 netlbl_netlink_payload_len(const struct sk_buff *skb)
-{
- return nlmsg_len((struct nlmsghdr *)skb->data) - GENL_HDRLEN;
-}
-
-/**
- * netlbl_netlink_payload_data - Returns a pointer to the start of the payload
- * @skb: the NETLINK buffer
- *
- * Description:
- * This function returns a pointer to the start of the NetLabel payload.
- *
- */
-static inline void *netlbl_netlink_payload_data(const struct sk_buff *skb)
-{
- return (unsigned char *)nlmsg_data((struct nlmsghdr *)skb->data) +
- GENL_HDRLEN;
-}
-
-/* NetLabel common protocol functions */
-
-void netlbl_netlink_send_ack(const struct genl_info *info,
- u32 genl_family,
- u8 ack_cmd,
- u32 ret_code);
-
/* NetLabel NETLINK I/O functions */
int netlbl_netlink_init(void);
-int netlbl_netlink_snd(struct sk_buff *skb, u32 pid);
-int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group);
#endif
diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig
index 814ddc42f1f4..293dbd6246c1 100644
--- a/security/selinux/Kconfig
+++ b/security/selinux/Kconfig
@@ -124,3 +124,40 @@ config SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT
If you are unsure what do do here, select N.
+config SECURITY_SELINUX_POLICYDB_VERSION_MAX
+ bool "NSA SELinux maximum supported policy format version"
+ depends on SECURITY_SELINUX
+ default n
+ help
+ This option enables the maximum policy format version supported
+ by SELinux to be set to a particular value. This value is reported
+ to userspace via /selinux/policyvers and used at policy load time.
+ It can be adjusted downward to support legacy userland (init) that
+ does not correctly handle kernels that support newer policy versions.
+
+ Examples:
+ For the Fedora Core 3 or 4 Linux distributions, enable this option
+ and set the value via the next option. For Fedore Core 5 and later,
+ do not enable this option.
+
+ If you are unsure how to answer this question, answer N.
+
+config SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
+ int "NSA SELinux maximum supported policy format version value"
+ depends on SECURITY_SELINUX_POLICYDB_VERSION_MAX
+ range 15 21
+ default 19
+ help
+ This option sets the value for the maximum policy format version
+ supported by SELinux.
+
+ Examples:
+ For Fedora Core 3, use 18.
+ For Fedora Core 4, use 19.
+
+ If you are unsure how to answer this question, look for the
+ policy format version supported by your policy toolchain, by
+ running 'checkpolicy -V'. Or look at what policy you have
+ installed under /etc/selinux/$SELINUXTYPE/policy, where
+ SELINUXTYPE is defined in your /etc/selinux/config.
+
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index 9d7737db5e51..b6f96943be1f 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -21,19 +21,10 @@
#include "security.h"
#include "objsec.h"
-void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
+int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen)
{
- struct task_security_struct *tsec = tsk->security;
if (selinux_enabled)
- *ctxid = tsec->sid;
- else
- *ctxid = 0;
-}
-
-int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
-{
- if (selinux_enabled)
- return security_sid_to_context(ctxid, ctx, ctxlen);
+ return security_sid_to_context(sid, ctx, ctxlen);
else {
*ctx = NULL;
*ctxlen = 0;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5a66c4c09f7a..e4d81a42fca4 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -51,7 +51,6 @@
#include <net/ip.h> /* for sysctl_local_port_range[] */
#include <net/tcp.h> /* struct or_callable used in sock_rcv_skb */
#include <asm/uaccess.h>
-#include <asm/semaphore.h>
#include <asm/ioctls.h>
#include <linux/bitops.h>
#include <linux/interrupt.h>
@@ -71,6 +70,7 @@
#include <linux/audit.h>
#include <linux/string.h>
#include <linux/selinux.h>
+#include <linux/mutex.h>
#include "avc.h"
#include "objsec.h"
@@ -185,7 +185,7 @@ static int inode_alloc_security(struct inode *inode)
return -ENOMEM;
memset(isec, 0, sizeof(*isec));
- init_MUTEX(&isec->sem);
+ mutex_init(&isec->lock);
INIT_LIST_HEAD(&isec->list);
isec->inode = inode;
isec->sid = SECINITSID_UNLABELED;
@@ -242,7 +242,7 @@ static int superblock_alloc_security(struct super_block *sb)
if (!sbsec)
return -ENOMEM;
- init_MUTEX(&sbsec->sem);
+ mutex_init(&sbsec->lock);
INIT_LIST_HEAD(&sbsec->list);
INIT_LIST_HEAD(&sbsec->isec_head);
spin_lock_init(&sbsec->isec_lock);
@@ -594,7 +594,7 @@ static int superblock_doinit(struct super_block *sb, void *data)
struct inode *inode = root->d_inode;
int rc = 0;
- down(&sbsec->sem);
+ mutex_lock(&sbsec->lock);
if (sbsec->initialized)
goto out;
@@ -689,7 +689,7 @@ next_inode:
}
spin_unlock(&sbsec->isec_lock);
out:
- up(&sbsec->sem);
+ mutex_unlock(&sbsec->lock);
return rc;
}
@@ -843,15 +843,13 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
char *context = NULL;
unsigned len = 0;
int rc = 0;
- int hold_sem = 0;
if (isec->initialized)
goto out;
- down(&isec->sem);
- hold_sem = 1;
+ mutex_lock(&isec->lock);
if (isec->initialized)
- goto out;
+ goto out_unlock;
sbsec = inode->i_sb->s_security;
if (!sbsec->initialized) {
@@ -862,7 +860,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
if (list_empty(&isec->list))
list_add(&isec->list, &sbsec->isec_head);
spin_unlock(&sbsec->isec_lock);
- goto out;
+ goto out_unlock;
}
switch (sbsec->behavior) {
@@ -885,7 +883,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
printk(KERN_WARNING "%s: no dentry for dev=%s "
"ino=%ld\n", __FUNCTION__, inode->i_sb->s_id,
inode->i_ino);
- goto out;
+ goto out_unlock;
}
len = INITCONTEXTLEN;
@@ -893,7 +891,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
if (!context) {
rc = -ENOMEM;
dput(dentry);
- goto out;
+ goto out_unlock;
}
rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
context, len);
@@ -903,7 +901,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
NULL, 0);
if (rc < 0) {
dput(dentry);
- goto out;
+ goto out_unlock;
}
kfree(context);
len = rc;
@@ -911,7 +909,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
if (!context) {
rc = -ENOMEM;
dput(dentry);
- goto out;
+ goto out_unlock;
}
rc = inode->i_op->getxattr(dentry,
XATTR_NAME_SELINUX,
@@ -924,7 +922,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
"%d for dev=%s ino=%ld\n", __FUNCTION__,
-rc, inode->i_sb->s_id, inode->i_ino);
kfree(context);
- goto out;
+ goto out_unlock;
}
/* Map ENODATA to the default file SID */
sid = sbsec->def_sid;
@@ -960,7 +958,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
isec->sclass,
&sid);
if (rc)
- goto out;
+ goto out_unlock;
isec->sid = sid;
break;
case SECURITY_FS_USE_MNTPOINT:
@@ -978,7 +976,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
isec->sclass,
&sid);
if (rc)
- goto out;
+ goto out_unlock;
isec->sid = sid;
}
}
@@ -987,12 +985,11 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
isec->initialized = 1;
+out_unlock:
+ mutex_unlock(&isec->lock);
out:
if (isec->sclass == SECCLASS_FILE)
isec->sclass = inode_mode_to_security_class(inode->i_mode);
-
- if (hold_sem)
- up(&isec->sem);
return rc;
}
@@ -1364,25 +1361,6 @@ static inline u32 file_to_av(struct file *file)
return av;
}
-/* Set an inode's SID to a specified value. */
-static int inode_security_set_sid(struct inode *inode, u32 sid)
-{
- struct inode_security_struct *isec = inode->i_security;
- struct superblock_security_struct *sbsec = inode->i_sb->s_security;
-
- if (!sbsec->initialized) {
- /* Defer initialization to selinux_complete_init. */
- return 0;
- }
-
- down(&isec->sem);
- isec->sclass = inode_mode_to_security_class(inode->i_mode);
- isec->sid = sid;
- isec->initialized = 1;
- up(&isec->sem);
- return 0;
-}
-
/* Hook functions begin here. */
static int selinux_ptrace(struct task_struct *parent, struct task_struct *child)
@@ -1711,10 +1689,12 @@ static inline void flush_unauthorized_files(struct files_struct * files)
{
struct avc_audit_data ad;
struct file *file, *devnull = NULL;
- struct tty_struct *tty = current->signal->tty;
+ struct tty_struct *tty;
struct fdtable *fdt;
long j = -1;
+ mutex_lock(&tty_mutex);
+ tty = current->signal->tty;
if (tty) {
file_list_lock();
file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list);
@@ -1734,6 +1714,7 @@ static inline void flush_unauthorized_files(struct files_struct * files)
}
file_list_unlock();
}
+ mutex_unlock(&tty_mutex);
/* Revalidate access to inherited open files. */
@@ -2091,7 +2072,13 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
}
}
- inode_security_set_sid(inode, newsid);
+ /* Possibly defer initialization to selinux_complete_init. */
+ if (sbsec->initialized) {
+ struct inode_security_struct *isec = inode->i_security;
+ isec->sclass = inode_mode_to_security_class(inode->i_mode);
+ isec->sid = newsid;
+ isec->initialized = 1;
+ }
if (!ss_initialized || sbsec->behavior == SECURITY_FS_USE_MNTPOINT)
return -EOPNOTSUPP;
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 0a39bfd1319f..ef2267fea8bd 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -44,7 +44,7 @@ struct inode_security_struct {
u32 sid; /* SID of this object */
u16 sclass; /* security class of this object */
unsigned char initialized; /* initialization flag */
- struct semaphore sem;
+ struct mutex lock;
unsigned char inherit; /* inherit SID from parent entry */
};
@@ -63,7 +63,7 @@ struct superblock_security_struct {
unsigned int behavior; /* labeling behavior */
unsigned char initialized; /* initialization flag */
unsigned char proc; /* proc fs */
- struct semaphore sem;
+ struct mutex lock;
struct list_head isec_head;
spinlock_t isec_lock;
};
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 911954a692fa..1ef79172cc8c 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -24,10 +24,15 @@
#define POLICYDB_VERSION_VALIDATETRANS 19
#define POLICYDB_VERSION_MLS 19
#define POLICYDB_VERSION_AVTAB 20
+#define POLICYDB_VERSION_RANGETRANS 21
/* Range of policy versions we understand*/
#define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE
-#define POLICYDB_VERSION_MAX POLICYDB_VERSION_AVTAB
+#ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX
+#define POLICYDB_VERSION_MAX CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
+#else
+#define POLICYDB_VERSION_MAX POLICYDB_VERSION_RANGETRANS
+#endif
extern int selinux_enabled;
extern int selinux_mls_enabled;
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 119bd6078ba1..c713af23250a 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -530,22 +530,21 @@ int mls_compute_sid(struct context *scontext,
u32 specified,
struct context *newcontext)
{
+ struct range_trans *rtr;
+
if (!selinux_mls_enabled)
return 0;
switch (specified) {
case AVTAB_TRANSITION:
- if (tclass == SECCLASS_PROCESS) {
- struct range_trans *rangetr;
- /* Look for a range transition rule. */
- for (rangetr = policydb.range_tr; rangetr;
- rangetr = rangetr->next) {
- if (rangetr->dom == scontext->type &&
- rangetr->type == tcontext->type) {
- /* Set the range from the rule */
- return mls_range_set(newcontext,
- &rangetr->range);
- }
+ /* Look for a range transition rule. */
+ for (rtr = policydb.range_tr; rtr; rtr = rtr->next) {
+ if (rtr->source_type == scontext->type &&
+ rtr->target_type == tcontext->type &&
+ rtr->target_class == tclass) {
+ /* Set the range from the rule */
+ return mls_range_set(newcontext,
+ &rtr->target_range);
}
}
/* Fallthrough */
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index f03960e697ce..b18895302555 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -96,6 +96,11 @@ static struct policydb_compat_info policydb_compat[] = {
.sym_num = SYM_NUM,
.ocon_num = OCON_NUM,
},
+ {
+ .version = POLICYDB_VERSION_RANGETRANS,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM,
+ },
};
static struct policydb_compat_info *policydb_lookup_compat(int version)
@@ -645,15 +650,15 @@ void policydb_destroy(struct policydb *p)
for (rt = p->range_tr; rt; rt = rt -> next) {
if (lrt) {
- ebitmap_destroy(&lrt->range.level[0].cat);
- ebitmap_destroy(&lrt->range.level[1].cat);
+ ebitmap_destroy(&lrt->target_range.level[0].cat);
+ ebitmap_destroy(&lrt->target_range.level[1].cat);
kfree(lrt);
}
lrt = rt;
}
if (lrt) {
- ebitmap_destroy(&lrt->range.level[0].cat);
- ebitmap_destroy(&lrt->range.level[1].cat);
+ ebitmap_destroy(&lrt->target_range.level[0].cat);
+ ebitmap_destroy(&lrt->target_range.level[1].cat);
kfree(lrt);
}
@@ -1829,6 +1834,7 @@ int policydb_read(struct policydb *p, void *fp)
}
if (p->policyvers >= POLICYDB_VERSION_MLS) {
+ int new_rangetr = p->policyvers >= POLICYDB_VERSION_RANGETRANS;
rc = next_entry(buf, fp, sizeof(u32));
if (rc < 0)
goto bad;
@@ -1847,9 +1853,16 @@ int policydb_read(struct policydb *p, void *fp)
rc = next_entry(buf, fp, (sizeof(u32) * 2));
if (rc < 0)
goto bad;
- rt->dom = le32_to_cpu(buf[0]);
- rt->type = le32_to_cpu(buf[1]);
- rc = mls_read_range_helper(&rt->range, fp);
+ rt->source_type = le32_to_cpu(buf[0]);
+ rt->target_type = le32_to_cpu(buf[1]);
+ if (new_rangetr) {
+ rc = next_entry(buf, fp, sizeof(u32));
+ if (rc < 0)
+ goto bad;
+ rt->target_class = le32_to_cpu(buf[0]);
+ } else
+ rt->target_class = SECCLASS_PROCESS;
+ rc = mls_read_range_helper(&rt->target_range, fp);
if (rc)
goto bad;
lrt = rt;
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
index b1340711f721..8319d5ff5944 100644
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -106,9 +106,10 @@ struct cat_datum {
};
struct range_trans {
- u32 dom; /* current process domain */
- u32 type; /* program executable type */
- struct mls_range range; /* new range */
+ u32 source_type;
+ u32 target_type;
+ u32 target_class;
+ struct mls_range target_range;
struct range_trans *next;
};
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 7eb69a602d8f..0c219a1b3243 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2003,7 +2003,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr,
return rc;
}
-int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
struct selinux_audit_rule *rule,
struct audit_context *actx)
{
@@ -2026,11 +2026,11 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
goto out;
}
- ctxt = sidtab_search(&sidtab, ctxid);
+ ctxt = sidtab_search(&sidtab, sid);
if (!ctxt) {
audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR,
"selinux_audit_rule_match: unrecognized SID %d\n",
- ctxid);
+ sid);
match = -ENOENT;
goto out;
}
@@ -2502,14 +2502,24 @@ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock)
{
struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
struct sk_security_struct *sksec = sk->sk_security;
+ struct netlbl_lsm_secattr secattr;
+ u32 nlbl_peer_sid;
sksec->sclass = isec->sclass;
if (sk->sk_family != PF_INET)
return;
+ netlbl_secattr_init(&secattr);
+ if (netlbl_sock_getattr(sk, &secattr) == 0 &&
+ selinux_netlbl_secattr_to_sid(NULL,
+ &secattr,
+ sksec->sid,
+ &nlbl_peer_sid) == 0)
+ sksec->peer_sid = nlbl_peer_sid;
+ netlbl_secattr_destroy(&secattr, 0);
+
sksec->nlbl_state = NLBL_REQUIRE;
- sksec->peer_sid = sksec->sid;
/* Try to set the NetLabel on the socket to save time later, if we fail
* here we will pick up the pieces in later calls to
@@ -2568,7 +2578,7 @@ int selinux_netlbl_inode_permission(struct inode *inode, int mask)
sock = SOCKET_I(inode);
isec = inode->i_security;
sksec = sock->sk->sk_security;
- down(&isec->sem);
+ mutex_lock(&isec->lock);
if (unlikely(sksec->nlbl_state == NLBL_REQUIRE &&
(mask & (MAY_WRITE | MAY_APPEND)))) {
lock_sock(sock->sk);
@@ -2576,7 +2586,7 @@ int selinux_netlbl_inode_permission(struct inode *inode, int mask)
release_sock(sock->sk);
} else
rc = 0;
- up(&isec->sem);
+ mutex_unlock(&isec->lock);
return rc;
}
@@ -2601,7 +2611,7 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
u32 netlbl_sid;
u32 recv_perm;
- rc = selinux_netlbl_skbuff_getsid(skb, sksec->sid, &netlbl_sid);
+ rc = selinux_netlbl_skbuff_getsid(skb, SECINITSID_NETMSG, &netlbl_sid);
if (rc != 0)
return rc;
@@ -2610,13 +2620,13 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
switch (sksec->sclass) {
case SECCLASS_UDP_SOCKET:
- recv_perm = UDP_SOCKET__RECV_MSG;
+ recv_perm = UDP_SOCKET__RECVFROM;
break;
case SECCLASS_TCP_SOCKET:
- recv_perm = TCP_SOCKET__RECV_MSG;
+ recv_perm = TCP_SOCKET__RECVFROM;
break;
default:
- recv_perm = RAWIP_SOCKET__RECV_MSG;
+ recv_perm = RAWIP_SOCKET__RECVFROM;
}
rc = avc_has_perm(sksec->sid,
diff --git a/sound/oss/au1550_ac97.c b/sound/oss/au1550_ac97.c
index 4cdb86252d67..219795171c71 100644
--- a/sound/oss/au1550_ac97.c
+++ b/sound/oss/au1550_ac97.c
@@ -719,8 +719,7 @@ prog_dmabuf_dac(struct au1550_state *s)
}
-static void
-dac_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static void dac_dma_interrupt(int irq, void *dev_id)
{
struct au1550_state *s = (struct au1550_state *) dev_id;
struct dmabuf *db = &s->dma_dac;
@@ -754,8 +753,7 @@ dac_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
}
-static void
-adc_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static void adc_dma_interrupt(int irq, void *dev_id)
{
struct au1550_state *s = (struct au1550_state *)dev_id;
struct dmabuf *dp = &s->dma_adc;
diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c
index 2bd8e40b8541..be0bd503f013 100644
--- a/sound/sparc/amd7930.c
+++ b/sound/sparc/amd7930.c
@@ -755,7 +755,7 @@ static struct snd_pcm_ops snd_amd7930_capture_ops = {
.pointer = snd_amd7930_capture_pointer,
};
-static int __init snd_amd7930_pcm(struct snd_amd7930 *amd)
+static int __devinit snd_amd7930_pcm(struct snd_amd7930 *amd)
{
struct snd_pcm *pcm;
int err;
@@ -870,7 +870,7 @@ static int snd_amd7930_put_volume(struct snd_kcontrol *kctl, struct snd_ctl_elem
return change;
}
-static struct snd_kcontrol_new amd7930_controls[] __initdata = {
+static struct snd_kcontrol_new amd7930_controls[] __devinitdata = {
{
.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
.name = "Monitor Volume",
@@ -900,7 +900,7 @@ static struct snd_kcontrol_new amd7930_controls[] __initdata = {
},
};
-static int __init snd_amd7930_mixer(struct snd_amd7930 *amd)
+static int __devinit snd_amd7930_mixer(struct snd_amd7930 *amd)
{
struct snd_card *card;
int idx, err;
@@ -945,11 +945,11 @@ static struct snd_device_ops snd_amd7930_dev_ops = {
.dev_free = snd_amd7930_dev_free,
};
-static int __init snd_amd7930_create(struct snd_card *card,
- struct resource *rp,
- unsigned int reg_size,
- int irq, int dev,
- struct snd_amd7930 **ramd)
+static int __devinit snd_amd7930_create(struct snd_card *card,
+ struct resource *rp,
+ unsigned int reg_size,
+ int irq, int dev,
+ struct snd_amd7930 **ramd)
{
unsigned long flags;
struct snd_amd7930 *amd;
@@ -1013,7 +1013,7 @@ static int __init snd_amd7930_create(struct snd_card *card,
return 0;
}
-static int __init amd7930_attach_common(struct resource *rp, int irq)
+static int __devinit amd7930_attach_common(struct resource *rp, int irq)
{
static int dev_num;
struct snd_card *card;
@@ -1065,7 +1065,7 @@ out_err:
return err;
}
-static int __init amd7930_obio_attach(struct device_node *dp)
+static int __devinit amd7930_obio_attach(struct device_node *dp)
{
struct linux_prom_registers *regs;
struct linux_prom_irqs *irqp;