diff options
223 files changed, 7096 insertions, 4686 deletions
diff --git a/Documentation/devicetree/bindings/dma/atmel-dma.txt b/Documentation/devicetree/bindings/dma/atmel-dma.txt index c80e8a3402f0..c280a0e6f42d 100644 --- a/Documentation/devicetree/bindings/dma/atmel-dma.txt +++ b/Documentation/devicetree/bindings/dma/atmel-dma.txt @@ -24,8 +24,11 @@ The three cells in order are: 1. A phandle pointing to the DMA controller. 2. The memory interface (16 most significant bits), the peripheral interface (16 less significant bits). -3. The peripheral identifier for the hardware handshaking interface. The -identifier can be different for tx and rx. +3. Parameters for the at91 DMA configuration register which are device +dependant: + - bit 7-0: peripheral identifier for the hardware handshaking interface. The + identifier can be different for tx and rx. + - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 1 for ASAP. Example: diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-dma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-dma.txt new file mode 100644 index 000000000000..2717ecb47db9 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/fsl-imx-dma.txt @@ -0,0 +1,48 @@ +* Freescale Direct Memory Access (DMA) Controller for i.MX + +This document will only describe differences to the generic DMA Controller and +DMA request bindings as described in dma/dma.txt . + +* DMA controller + +Required properties: +- compatible : Should be "fsl,<chip>-dma". chip can be imx1, imx21 or imx27 +- reg : Should contain DMA registers location and length +- interrupts : First item should be DMA interrupt, second one is optional and + should contain DMA Error interrupt +- #dma-cells : Has to be 1. imx-dma does not support anything else. + +Optional properties: +- #dma-channels : Number of DMA channels supported. Should be 16. +- #dma-requests : Number of DMA requests supported. + +Example: + + dma: dma@10001000 { + compatible = "fsl,imx27-dma"; + reg = <0x10001000 0x1000>; + interrupts = <32 33>; + #dma-cells = <1>; + #dma-channels = <16>; + }; + + +* DMA client + +Clients have to specify the DMA requests with phandles in a list. + +Required properties: +- dmas: List of one or more DMA request specifiers. One DMA request specifier + consists of a phandle to the DMA controller followed by the integer + specifiying the request line. +- dma-names: List of string identifiers for the DMA requests. For the correct + names, have a look at the specific client driver. + +Example: + + sdhci1: sdhci@10013000 { + ... + dmas = <&dma 7>; + dma-names = "rx-tx"; + ... + }; diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt index d1e3f443e205..68cee4f5539f 100644 --- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt +++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt @@ -4,14 +4,70 @@ Required properties: - compatible : Should be "fsl,<chip>-sdma" - reg : Should contain SDMA registers location and length - interrupts : Should contain SDMA interrupt +- #dma-cells : Must be <3>. + The first cell specifies the DMA request/event ID. See details below + about the second and third cell. - fsl,sdma-ram-script-name : Should contain the full path of SDMA RAM scripts firmware +The second cell of dma phandle specifies the peripheral type of DMA transfer. +The full ID of peripheral types can be found below. + + ID transfer type + --------------------- + 0 MCU domain SSI + 1 Shared SSI + 2 MMC + 3 SDHC + 4 MCU domain UART + 5 Shared UART + 6 FIRI + 7 MCU domain CSPI + 8 Shared CSPI + 9 SIM + 10 ATA + 11 CCM + 12 External peripheral + 13 Memory Stick Host Controller + 14 Shared Memory Stick Host Controller + 15 DSP + 16 Memory + 17 FIFO type Memory + 18 SPDIF + 19 IPU Memory + 20 ASRC + 21 ESAI + +The third cell specifies the transfer priority as below. + + ID transfer priority + ------------------------- + 0 High + 1 Medium + 2 Low + Examples: sdma@83fb0000 { compatible = "fsl,imx51-sdma", "fsl,imx35-sdma"; reg = <0x83fb0000 0x4000>; interrupts = <6>; + #dma-cells = <3>; fsl,sdma-ram-script-name = "sdma-imx51.bin"; }; + +DMA clients connected to the i.MX SDMA controller must use the format +described in the dma.txt file. + +Examples: + +ssi2: ssi@70014000 { + compatible = "fsl,imx51-ssi", "fsl,imx21-ssi"; + reg = <0x70014000 0x4000>; + interrupts = <30>; + clocks = <&clks 49>; + dmas = <&sdma 24 1 0>, + <&sdma 25 1 0>; + dma-names = "rx", "tx"; + fsl,fifo-depth = <15>; +}; diff --git a/Documentation/devicetree/bindings/dma/shdma.txt b/Documentation/devicetree/bindings/dma/shdma.txt new file mode 100644 index 000000000000..c15994aa1939 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/shdma.txt @@ -0,0 +1,75 @@ +* SHDMA Device Tree bindings + +Sh-/r-mobile and r-car systems often have multiple identical DMA controller +instances, capable of serving any of a common set of DMA slave devices, using +the same configuration. To describe this topology we require all compatible +SHDMA DT nodes to be placed under a DMA multiplexer node. All such compatible +DMAC instances have the same number of channels and use the same DMA +descriptors. Therefore respective DMA DT bindings can also all be placed in the +multiplexer node. Even if there is only one such DMAC instance on a system, it +still has to be placed under such a multiplexer node. + +* DMA multiplexer + +Required properties: +- compatible: should be "renesas,shdma-mux" +- #dma-cells: should be <1>, see "dmas" property below + +Optional properties (currently unused): +- dma-channels: number of DMA channels +- dma-requests: number of DMA request signals + +* DMA controller + +Required properties: +- compatible: should be "renesas,shdma" + +Example: + dmac: dma-mux0 { + compatible = "renesas,shdma-mux"; + #dma-cells = <1>; + dma-channels = <6>; + dma-requests = <256>; + reg = <0 0>; /* Needed for AUXDATA */ + #address-cells = <1>; + #size-cells = <1>; + ranges; + + dma0: shdma@fe008020 { + compatible = "renesas,shdma"; + reg = <0xfe008020 0x270>, + <0xfe009000 0xc>; + interrupt-parent = <&gic>; + interrupts = <0 34 4 + 0 28 4 + 0 29 4 + 0 30 4 + 0 31 4 + 0 32 4 + 0 33 4>; + interrupt-names = "error", + "ch0", "ch1", "ch2", "ch3", + "ch4", "ch5"; + }; + + dma1: shdma@fe018020 { + ... + }; + + dma2: shdma@fe028020 { + ... + }; + }; + +* DMA client + +Required properties: +- dmas: a list of <[DMA multiplexer phandle] [MID/RID value]> pairs, + where MID/RID values are fixed handles, specified in the SoC + manual +- dma-names: a list of DMA channel names, one per "dmas" entry + +Example: + dmas = <&dmac 0xd1 + &dmac 0xd2>; + dma-names = "tx", "rx"; diff --git a/Documentation/devicetree/bindings/metag/meta.txt b/Documentation/devicetree/bindings/metag/meta.txt new file mode 100644 index 000000000000..f4457f57ab08 --- /dev/null +++ b/Documentation/devicetree/bindings/metag/meta.txt @@ -0,0 +1,30 @@ +* Meta Processor Binding + +This binding specifies what properties must be available in the device tree +representation of a Meta Processor Core, which is the root node in the tree. + +Required properties: + + - compatible: Specifies the compatibility list for the Meta processor. + The type shall be <string> and the value shall include "img,meta". + +Optional properties: + + - clocks: Clock consumer specifiers as described in + Documentation/devicetree/bindings/clock/clock-bindings.txt + + - clock-names: Clock consumer names as described in + Documentation/devicetree/bindings/clock/clock-bindings.txt. + +Clocks are identified by name. Valid clocks are: + + - "core": The Meta core clock from which the Meta timers are derived. + +* Examples + +/ { + compatible = "toumaz,tz1090", "img,meta"; + + clocks = <&meta_core_clk>; + clock-names = "core"; +}; diff --git a/Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt b/Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt new file mode 100644 index 000000000000..b2d07ad90e9a --- /dev/null +++ b/Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt @@ -0,0 +1,33 @@ +TI-NSPIRE timer + +Required properties: + +- compatible : should be "lsi,zevio-timer". +- reg : The physical base address and size of the timer (always first). +- clocks: phandle to the source clock. + +Optional properties: + +- interrupts : The interrupt number of the first timer. +- reg : The interrupt acknowledgement registers + (always after timer base address) + +If any of the optional properties are not given, the timer is added as a +clock-source only. + +Example: + +timer { + compatible = "lsi,zevio-timer"; + reg = <0x900D0000 0x1000>, <0x900A0020 0x8>; + interrupts = <19>; + clocks = <&timer_clk>; +}; + +Example (no clock-events): + +timer { + compatible = "lsi,zevio-timer"; + reg = <0x900D0000 0x1000>; + clocks = <&timer_clk>; +}; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index db0457d61682..948f61561ffa 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -58,6 +58,7 @@ snps Synopsys, Inc. st STMicroelectronics ste ST-Ericsson stericsson ST-Ericsson +toumaz Toumaz ti Texas Instruments toshiba Toshiba Corporation v3 V3 Semiconductor diff --git a/MAINTAINERS b/MAINTAINERS index 50105f98cb68..97762ad25300 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7057,8 +7057,7 @@ SYNOPSYS DESIGNWARE DMAC DRIVER M: Viresh Kumar <viresh.linux@gmail.com> S: Maintained F: include/linux/dw_dmac.h -F: drivers/dma/dw_dmac_regs.h -F: drivers/dma/dw_dmac.c +F: drivers/dma/dw/ SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER M: Seungwon Jeon <tgih.jun@samsung.com> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 531cdda016f9..5ef7af01373a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -14,6 +14,7 @@ config ARM select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP + select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_IDLE_POLL_SETUP select GENERIC_STRNCPY_FROM_USER diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index 195451bf7706..6a8acb01b1d3 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi @@ -736,7 +736,7 @@ dcp@80028000 { reg = <0x80028000 0x2000>; interrupts = <52 53 54>; - status = "disabled"; + compatible = "fsl-dcp"; }; pxp@8002a000 { diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c index ddc740769601..023ee63827a2 100644 --- a/arch/arm/common/timer-sp.c +++ b/arch/arm/common/timer-sp.c @@ -28,8 +28,8 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <asm/hardware/arm_timer.h> #include <asm/hardware/timer-sp.h> diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h index 3d520ddca61b..2389b71a8e7c 100644 --- a/arch/arm/include/asm/sched_clock.h +++ b/arch/arm/include/asm/sched_clock.h @@ -1,16 +1,4 @@ -/* - * sched_clock.h: support for extending counters to full 64-bit ns counter - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. +/* You shouldn't include this file. Use linux/sched_clock.h instead. + * Temporary file until all asm/sched_clock.h users are gone */ -#ifndef ASM_SCHED_CLOCK -#define ASM_SCHED_CLOCK - -extern void sched_clock_postinit(void); -extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate); - -extern unsigned long long (*sched_clock_func)(void); - -#endif +#include <linux/sched_clock.h> diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h index 799f42ecca63..7704e28c3483 100644 --- a/arch/arm/include/asm/xen/hypercall.h +++ b/arch/arm/include/asm/xen/hypercall.h @@ -47,6 +47,7 @@ unsigned long HYPERVISOR_hvm_op(int op, void *arg); int HYPERVISOR_memory_op(unsigned int cmd, void *arg); int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); +int HYPERVISOR_tmem_op(void *arg); static inline void MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index fccfbdb03df1..86d10dd47dc4 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg # Object file lists. obj-y := elf.o entry-common.o irq.o opcodes.o \ - process.o ptrace.o return_address.o sched_clock.o \ + process.o ptrace.o return_address.o \ setup.o signal.o stacktrace.o sys_arm.o time.o traps.o obj-$(CONFIG_ATAGS) += atags_parse.o diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index 59dcdced6e30..221f07b11ccb 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -11,9 +11,9 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/errno.h> +#include <linux/sched_clock.h> #include <asm/delay.h> -#include <asm/sched_clock.h> #include <clocksource/arm_arch_timer.h> diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index abff4e9aaee0..98aee3258398 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -24,9 +24,9 @@ #include <linux/timer.h> #include <linux/clocksource.h> #include <linux/irq.h> +#include <linux/sched_clock.h> #include <asm/thread_info.h> -#include <asm/sched_clock.h> #include <asm/stacktrace.h> #include <asm/mach/arch.h> #include <asm/mach/time.h> @@ -120,6 +120,4 @@ void __init time_init(void) machine_desc->init_time(); else clocksource_of_init(); - - sched_clock_postinit(); } diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index bad361ec1666..7a55b5c95971 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -18,8 +18,8 @@ #include <linux/clk.h> #include <linux/err.h> #include <linux/platform_device.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index fea91313678b..cd46529e9eaa 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c @@ -26,8 +26,8 @@ #include <linux/clockchips.h> #include <linux/clk.h> #include <linux/err.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <asm/mach/time.h> #include "common.h" diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index a5b15c4e8def..d9e95e612fcb 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -41,6 +41,7 @@ #include <linux/stat.h> #include <linux/sys_soc.h> #include <linux/termios.h> +#include <linux/sched_clock.h> #include <mach/hardware.h> #include <mach/platform.h> @@ -48,7 +49,6 @@ #include <asm/setup.h> #include <asm/param.h> /* HZ */ #include <asm/mach-types.h> -#include <asm/sched_clock.h> #include <mach/lm.h> #include <mach/irqs.h> diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index d7223b3b81f3..1f6c1fb353ad 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -30,6 +30,7 @@ #include <linux/export.h> #include <linux/gpio.h> #include <linux/cpu.h> +#include <linux/sched_clock.h> #include <mach/udc.h> #include <mach/hardware.h> @@ -38,7 +39,6 @@ #include <asm/pgtable.h> #include <asm/page.h> #include <asm/irq.h> -#include <asm/sched_clock.h> #include <asm/system_misc.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c index c1cd5a943ab1..e54f87ec2e4a 100644 --- a/arch/arm/mach-lpc32xx/phy3250.c +++ b/arch/arm/mach-lpc32xx/phy3250.c @@ -182,8 +182,8 @@ static void pl08x_put_signal(const struct pl08x_channel_data *cd, int ch) static struct pl08x_platform_data pl08x_pd = { .slave_channels = &pl08x_slave_channels[0], .num_slave_channels = ARRAY_SIZE(pl08x_slave_channels), - .get_signal = pl08x_get_signal, - .put_signal = pl08x_put_signal, + .get_xfer_signal = pl08x_get_signal, + .put_xfer_signal = pl08x_put_signal, .lli_buses = PL08X_AHB1, .mem_buses = PL08X_AHB1, }; diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c index 86a18b3d252e..7ac41e83cfef 100644 --- a/arch/arm/mach-mmp/time.c +++ b/arch/arm/mach-mmp/time.c @@ -28,8 +28,8 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <mach/addr-map.h> #include <mach/regs-timers.h> #include <mach/regs-apbc.h> diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 284313f3e02c..b6418fd5fe0d 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -23,10 +23,10 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> #include <asm/localtimer.h> -#include <asm/sched_clock.h> #include "common.h" diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 726ec23d29c7..80603d2fef77 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -43,9 +43,9 @@ #include <linux/clocksource.h> #include <linux/clockchips.h> #include <linux/io.h> +#include <linux/sched_clock.h> #include <asm/irq.h> -#include <asm/sched_clock.h> #include <mach/hardware.h> #include <asm/mach/irq.h> diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 3bdb0fb02028..29ac667b7a8b 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -41,10 +41,10 @@ #include <linux/of_irq.h> #include <linux/platform_device.h> #include <linux/platform_data/dmtimer-omap.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> #include <asm/smp_twd.h> -#include <asm/sched_clock.h> #include "omap_hwmod.h" #include "omap_device.h" diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 8f1ee92aea30..9aa852a8fab9 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -16,11 +16,11 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/clockchips.h> +#include <linux/sched_clock.h> #include <asm/div64.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> -#include <asm/sched_clock.h> #include <mach/regs-ost.h> #include <mach/irqs.h> diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c index a59a13a665a6..713c86cd3d64 100644 --- a/arch/arm/mach-sa1100/time.c +++ b/arch/arm/mach-sa1100/time.c @@ -14,9 +14,9 @@ #include <linux/irq.h> #include <linux/timex.h> #include <linux/clockchips.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> -#include <asm/sched_clock.h> #include <mach/hardware.h> #include <mach/irqs.h> diff --git a/arch/arm/mach-spear/spear3xx.c b/arch/arm/mach-spear/spear3xx.c index 0227c97797cd..bf3b1fd8cb23 100644 --- a/arch/arm/mach-spear/spear3xx.c +++ b/arch/arm/mach-spear/spear3xx.c @@ -56,8 +56,8 @@ struct pl08x_platform_data pl080_plat_data = { }, .lli_buses = PL08X_AHB1, .mem_buses = PL08X_AHB1, - .get_signal = pl080_get_signal, - .put_signal = pl080_put_signal, + .get_xfer_signal = pl080_get_signal, + .put_xfer_signal = pl080_put_signal, }; /* diff --git a/arch/arm/mach-spear/spear6xx.c b/arch/arm/mach-spear/spear6xx.c index 8b0295a41226..da26fa5b68d7 100644 --- a/arch/arm/mach-spear/spear6xx.c +++ b/arch/arm/mach-spear/spear6xx.c @@ -334,8 +334,8 @@ static struct pl08x_platform_data spear6xx_pl080_plat_data = { }, .lli_buses = PL08X_AHB1, .mem_buses = PL08X_AHB1, - .get_signal = pl080_get_signal, - .put_signal = pl080_put_signal, + .get_xfer_signal = pl080_get_signal, + .put_xfer_signal = pl080_put_signal, .slave_channels = spear600_dma_info, .num_slave_channels = ARRAY_SIZE(spear600_dma_info), }; diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c index 390ae5feb1d0..b5db207dfd1e 100644 --- a/arch/arm/mach-u300/timer.c +++ b/arch/arm/mach-u300/timer.c @@ -21,9 +21,9 @@ #include <linux/delay.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/sched_clock.h> /* Generic stuff */ -#include <asm/sched_clock.h> #include <asm/mach/map.h> #include <asm/mach/time.h> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7ec02961dfa0..7f9b1798c6cf 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1328,6 +1328,15 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, if (gfp & GFP_ATOMIC) return __iommu_alloc_atomic(dev, size, handle); + /* + * Following is a work-around (a.k.a. hack) to prevent pages + * with __GFP_COMP being passed to split_page() which cannot + * handle them. The real problem is that this flag probably + * should be 0 on ARM as it is not supported on this + * platform; see CONFIG_HUGETLBFS. + */ + gfp &= ~(__GFP_COMP); + pages = __iommu_alloc_buffer(dev, size, gfp, attrs); if (!pages) return NULL; @@ -1386,16 +1395,17 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { - struct page **pages = __iommu_get_pages(cpu_addr, attrs); + struct page **pages; size = PAGE_ALIGN(size); - if (!pages) { - WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + if (__in_atomic_pool(cpu_addr, size)) { + __iommu_free_atomic(dev, cpu_addr, handle, size); return; } - if (__in_atomic_pool(cpu_addr, size)) { - __iommu_free_atomic(dev, cpu_addr, handle, size); + pages = __iommu_get_pages(cpu_addr, attrs); + if (!pages) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); return; } @@ -1650,13 +1660,27 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p { struct dma_iommu_mapping *mapping = dev->archdata.mapping; dma_addr_t dma_addr; - int ret, len = PAGE_ALIGN(size + offset); + int ret, prot, len = PAGE_ALIGN(size + offset); dma_addr = __alloc_iova(mapping, len); if (dma_addr == DMA_ERROR_CODE) return dma_addr; - ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0); + switch (dir) { + case DMA_BIDIRECTIONAL: + prot = IOMMU_READ | IOMMU_WRITE; + break; + case DMA_TO_DEVICE: + prot = IOMMU_READ; + break; + case DMA_FROM_DEVICE: + prot = IOMMU_WRITE; + break; + default: + prot = 0; + } + + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); if (ret < 0) goto fail; @@ -1921,7 +1945,7 @@ void arm_iommu_detach_device(struct device *dev) iommu_detach_device(mapping->domain, dev); kref_put(&mapping->kref, release_iommu_mapping); - mapping = NULL; + dev->archdata.mapping = NULL; set_dma_ops(dev, NULL); pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c index 837a2d52e9db..29606bd75f3f 100644 --- a/arch/arm/plat-iop/time.c +++ b/arch/arm/plat-iop/time.c @@ -22,9 +22,9 @@ #include <linux/clocksource.h> #include <linux/clockchips.h> #include <linux/export.h> +#include <linux/sched_clock.h> #include <mach/hardware.h> #include <asm/irq.h> -#include <asm/sched_clock.h> #include <asm/uaccess.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 5b0b86bb34bb..d9bc98eb2a6b 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -18,9 +18,9 @@ #include <linux/err.h> #include <linux/io.h> #include <linux/clocksource.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> -#include <asm/sched_clock.h> #include <plat/counter-32k.h> diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 5d5ac0f05422..9d2b2ac74938 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -16,7 +16,7 @@ #include <linux/clockchips.h> #include <linux/interrupt.h> #include <linux/irq.h> -#include <asm/sched_clock.h> +#include <linux/sched_clock.h> /* * MBus bridge block registers. diff --git a/arch/arm/plat-samsung/samsung-time.c b/arch/arm/plat-samsung/samsung-time.c index f899cbc9b288..2957075ca836 100644 --- a/arch/arm/plat-samsung/samsung-time.c +++ b/arch/arm/plat-samsung/samsung-time.c @@ -15,12 +15,12 @@ #include <linux/clk.h> #include <linux/clockchips.h> #include <linux/platform_device.h> +#include <linux/sched_clock.h> #include <asm/smp_twd.h> #include <asm/mach/time.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> -#include <asm/sched_clock.h> #include <mach/map.h> #include <plat/devs.h> diff --git a/arch/arm/plat-versatile/sched-clock.c b/arch/arm/plat-versatile/sched-clock.c index b33b74c87232..51b109e3b6c3 100644 --- a/arch/arm/plat-versatile/sched-clock.c +++ b/arch/arm/plat-versatile/sched-clock.c @@ -20,8 +20,8 @@ */ #include <linux/kernel.h> #include <linux/io.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <plat/sched_clock.h> static void __iomem *ctr; diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 13609e01f4b7..f71c37edca26 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -314,4 +314,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_hvm_op); EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op); EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); EXPORT_SYMBOL_GPL(privcmd_call); diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index 199cb2da7663..d1cf7b7c2200 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -88,6 +88,7 @@ HYPERCALL2(hvm_op); HYPERCALL2(memory_op); HYPERCALL2(physdev_op); HYPERCALL3(vcpu_op); +HYPERCALL1(tmem_op); ENTRY(privcmd_call) stmdb sp!, {r4} diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 2816c479cd49..531342ec4bcf 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -79,6 +79,7 @@ HYPERCALL2(hvm_op); HYPERCALL2(memory_op); HYPERCALL2(physdev_op); HYPERCALL3(vcpu_op); +HYPERCALL1(tmem_op); ENTRY(privcmd_call) mov x16, x0 diff --git a/arch/metag/Kconfig.soc b/arch/metag/Kconfig.soc index ec079cfb7c6a..2a3c860c7525 100644 --- a/arch/metag/Kconfig.soc +++ b/arch/metag/Kconfig.soc @@ -14,6 +14,18 @@ config META21_FPGA help This is a Meta 2.1 FPGA bitstream, just a bare CPU. +config SOC_TZ1090 + bool "Toumaz Xenif TZ1090 SoC (Comet)" + select METAG_LNKGET_AROUND_CACHE + select METAG_META21 + select METAG_SMP_WRITE_REORDERING + select PINCTRL + select PINCTRL_TZ1090 + select PINCTRL_TZ1090_PDC + help + This is a Toumaz Technology Xenif TZ1090 (A.K.A. Comet) SoC containing + a 2-threaded HTP. + endchoice menu "SoC configuration" diff --git a/arch/metag/Makefile b/arch/metag/Makefile index b566116b171b..9739857bdedc 100644 --- a/arch/metag/Makefile +++ b/arch/metag/Makefile @@ -20,7 +20,7 @@ checkflags-$(CONFIG_METAG_META12) += -DMETAC_1_2 checkflags-$(CONFIG_METAG_META21) += -DMETAC_2_1 CHECKFLAGS += -D__metag__ $(checkflags-y) -KBUILD_DEFCONFIG := meta2_defconfig +KBUILD_DEFCONFIG := tz1090_defconfig sflags-$(CONFIG_METAG_META12) += -mmetac=1.2 ifeq ($(CONFIG_METAG_META12),y) diff --git a/arch/metag/boot/.gitignore b/arch/metag/boot/.gitignore index a021da201156..2d6c0c160884 100644 --- a/arch/metag/boot/.gitignore +++ b/arch/metag/boot/.gitignore @@ -1,4 +1,4 @@ vmlinux* uImage* ramdisk.* -*.dtb +*.dtb* diff --git a/arch/metag/boot/dts/Makefile b/arch/metag/boot/dts/Makefile index dbd95217733a..72c121879426 100644 --- a/arch/metag/boot/dts/Makefile +++ b/arch/metag/boot/dts/Makefile @@ -1,7 +1,9 @@ dtb-y += skeleton.dtb +dtb-y += tz1090_generic.dtb # Built-in dtb builtindtb-y := skeleton +builtindtb-$(CONFIG_SOC_TZ1090) := tz1090_generic ifneq ($(CONFIG_METAG_BUILTIN_DTB_NAME),"") builtindtb-y := $(patsubst "%",%,$(CONFIG_METAG_BUILTIN_DTB_NAME)) diff --git a/arch/metag/boot/dts/include/dt-bindings b/arch/metag/boot/dts/include/dt-bindings new file mode 120000 index 000000000000..08c00e4972fa --- /dev/null +++ b/arch/metag/boot/dts/include/dt-bindings @@ -0,0 +1 @@ +../../../../../include/dt-bindings
\ No newline at end of file diff --git a/arch/metag/boot/dts/skeleton.dts b/arch/metag/boot/dts/skeleton.dts index 7244d1f0d555..7a49aeb365d0 100644 --- a/arch/metag/boot/dts/skeleton.dts +++ b/arch/metag/boot/dts/skeleton.dts @@ -7,4 +7,4 @@ */ /dts-v1/; -/include/ "skeleton.dtsi" +#include "skeleton.dtsi" diff --git a/arch/metag/boot/dts/tz1090.dtsi b/arch/metag/boot/dts/tz1090.dtsi new file mode 100644 index 000000000000..853744652b93 --- /dev/null +++ b/arch/metag/boot/dts/tz1090.dtsi @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "skeleton.dtsi" + +/ { + compatible = "toumaz,tz1090", "img,meta"; + + interrupt-parent = <&intc>; + + intc: interrupt-controller { + compatible = "img,meta-intc"; + interrupt-controller; + #interrupt-cells = <2>; + num-banks = <2>; + }; + + soc { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + pinctrl: pinctrl@02005800 { + #gpio-range-cells = <3>; + compatible = "img,tz1090-pinctrl"; + reg = <0x02005800 0xe4>; + }; + + pdc_pinctrl: pinctrl@02006500 { + #gpio-range-cells = <3>; + compatible = "img,tz1090-pdc-pinctrl"; + reg = <0x02006500 0x100>; + }; + }; +}; diff --git a/arch/metag/boot/dts/tz1090_generic.dts b/arch/metag/boot/dts/tz1090_generic.dts new file mode 100644 index 000000000000..f96090955964 --- /dev/null +++ b/arch/metag/boot/dts/tz1090_generic.dts @@ -0,0 +1,10 @@ +/* + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +#include "tz1090.dtsi" diff --git a/arch/metag/configs/tz1090_defconfig b/arch/metag/configs/tz1090_defconfig new file mode 100644 index 000000000000..9f9316a6df27 --- /dev/null +++ b/arch/metag/configs/tz1090_defconfig @@ -0,0 +1,42 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_ELF_CORE is not set +CONFIG_SLAB=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_FLATMEM_MANUAL=y +CONFIG_SOC_TZ1090=y +CONFIG_METAG_HALT_ON_PANIC=y +# CONFIG_METAG_FPU is not set +CONFIG_METAG_DA=y +CONFIG_HZ_100=y +CONFIG_DEVTMPFS=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=1 +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_DA_TTY=y +CONFIG_DA_CONSOLE=y +# CONFIG_DEVKMEM is not set +# CONFIG_HW_RANDOM is not set +CONFIG_GPIOLIB=y +# CONFIG_HWMON is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_DNOTIFY is not set +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_SCHED_DEBUG is not set +CONFIG_DEBUG_INFO=y diff --git a/arch/metag/include/asm/bug.h b/arch/metag/include/asm/bug.h index d04b48cefecc..9f8967f10f8c 100644 --- a/arch/metag/include/asm/bug.h +++ b/arch/metag/include/asm/bug.h @@ -6,7 +6,7 @@ struct pt_regs; extern const char *trap_name(int trapno); -extern void die(const char *str, struct pt_regs *regs, long err, - unsigned long addr) __attribute__ ((noreturn)); +extern void __noreturn die(const char *str, struct pt_regs *regs, long err, + unsigned long addr); #endif diff --git a/arch/metag/include/asm/clock.h b/arch/metag/include/asm/clock.h index 3e2915a280c7..ded4ab2e1fd0 100644 --- a/arch/metag/include/asm/clock.h +++ b/arch/metag/include/asm/clock.h @@ -19,6 +19,8 @@ * core frequency will be determined like this: * Meta 1: based on loops_per_jiffy. * Meta 2: (EXPAND_TIMER_DIV + 1) MHz. + * If a "core" clock is provided by the device tree, it + * will override this function. */ struct meta_clock_desc { unsigned long (*get_core_freq)(void); @@ -27,6 +29,12 @@ struct meta_clock_desc { extern struct meta_clock_desc _meta_clock; /* + * Perform platform clock initialisation, reading clocks from device tree etc. + * Only accessible during boot. + */ +void init_metag_clocks(void); + +/* * Set up the default clock, ensuring all callbacks are valid - only accessible * during boot. */ diff --git a/arch/metag/include/asm/irq.h b/arch/metag/include/asm/irq.h index be0c8f3c5a5d..ad6bd0edbc3b 100644 --- a/arch/metag/include/asm/irq.h +++ b/arch/metag/include/asm/irq.h @@ -17,6 +17,7 @@ struct pt_regs; int tbisig_map(unsigned int hw); extern void do_IRQ(int irq, struct pt_regs *regs); +extern void init_IRQ(void); #ifdef CONFIG_METAG_SUSPEND_MEM int traps_save_context(void); diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index 9b029a7911c3..f16477d1f571 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -199,4 +199,6 @@ extern void (*soc_halt)(void); extern void show_trace(struct task_struct *tsk, unsigned long *sp, struct pt_regs *regs); +extern const struct seq_operations cpuinfo_op; + #endif diff --git a/arch/metag/kernel/cachepart.c b/arch/metag/kernel/cachepart.c index 954548b1bea8..0a2385fa2a1d 100644 --- a/arch/metag/kernel/cachepart.c +++ b/arch/metag/kernel/cachepart.c @@ -100,22 +100,23 @@ void check_for_cache_aliasing(int thread_id) thread_cache_size = get_thread_cache_size(cache_type, thread_id); if (thread_cache_size < 0) - pr_emerg("Can't read %s cache size", \ + pr_emerg("Can't read %s cache size\n", cache_type ? "DCACHE" : "ICACHE"); else if (thread_cache_size == 0) /* Cache is off. No need to check for aliasing */ continue; if (thread_cache_size / CACHE_ASSOCIATIVITY > PAGE_SIZE) { - pr_emerg("Cache aliasing detected in %s on Thread %d", + pr_emerg("Potential cache aliasing detected in %s on Thread %d\n", cache_type ? "DCACHE" : "ICACHE", thread_id); - pr_warn("Total %s size: %u bytes", - cache_type ? "DCACHE" : "ICACHE ", + pr_warn("Total %s size: %u bytes\n", + cache_type ? "DCACHE" : "ICACHE", cache_type ? get_dcache_size() : get_icache_size()); - pr_warn("Thread %s size: %d bytes", + pr_warn("Thread %s size: %d bytes\n", cache_type ? "CACHE" : "ICACHE", thread_cache_size); - pr_warn("Page Size: %lu bytes", PAGE_SIZE); + pr_warn("Page Size: %lu bytes\n", PAGE_SIZE); + panic("Potential cache aliasing detected"); } } } diff --git a/arch/metag/kernel/clock.c b/arch/metag/kernel/clock.c index defc84056f18..6339c9c6d0ab 100644 --- a/arch/metag/kernel/clock.c +++ b/arch/metag/kernel/clock.c @@ -8,8 +8,10 @@ * published by the Free Software Foundation. */ +#include <linux/clk.h> #include <linux/delay.h> #include <linux/io.h> +#include <linux/of.h> #include <asm/param.h> #include <asm/clock.h> @@ -34,8 +36,63 @@ static unsigned long get_core_freq_default(void) #endif } +static struct clk *clk_core; + +/* Clk based get_core_freq callback. */ +static unsigned long get_core_freq_clk(void) +{ + return clk_get_rate(clk_core); +} + +/** + * init_metag_core_clock() - Set up core clock from devicetree. + * + * Checks to see if a "core" clock is provided in the device tree, and overrides + * the get_core_freq callback to use it. + */ +static void __init init_metag_core_clock(void) +{ + /* + * See if a core clock is provided by the devicetree (and + * registered by the init callback above). + */ + struct device_node *node; + node = of_find_compatible_node(NULL, NULL, "img,meta"); + if (!node) { + pr_warn("%s: no compatible img,meta DT node found\n", + __func__); + return; + } + + clk_core = of_clk_get_by_name(node, "core"); + if (IS_ERR(clk_core)) { + pr_warn("%s: no core clock found in DT\n", + __func__); + return; + } + + /* + * Override the core frequency callback to use + * this clk. + */ + _meta_clock.get_core_freq = get_core_freq_clk; +} + +/** + * init_metag_clocks() - Set up clocks from devicetree. + * + * Set up important clocks from device tree. In particular any needed for clock + * sources. + */ +void __init init_metag_clocks(void) +{ + init_metag_core_clock(); + + pr_info("Core clock frequency: %lu Hz\n", get_coreclock()); +} + /** - * setup_meta_clocks() - Set up the Meta clock. + * setup_meta_clocks() - Early set up of the Meta clock. * @desc: Clock descriptor usually provided by machine description * * Ensures all callbacks are valid. diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c index 87707efeb0a3..2a2c9d55187e 100644 --- a/arch/metag/kernel/irq.c +++ b/arch/metag/kernel/irq.c @@ -25,7 +25,7 @@ static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; #endif -struct irq_domain *root_domain; +static struct irq_domain *root_domain; static unsigned int startup_meta_irq(struct irq_data *data) { @@ -279,11 +279,12 @@ static void route_irq(struct irq_data *data, unsigned int irq, unsigned int cpu) { struct irq_desc *desc = irq_to_desc(irq); struct irq_chip *chip = irq_data_get_irq_chip(data); + unsigned long flags; - raw_spin_lock_irq(&desc->lock); + raw_spin_lock_irqsave(&desc->lock, flags); if (chip->irq_set_affinity) chip->irq_set_affinity(data, cpumask_of(cpu), false); - raw_spin_unlock_irq(&desc->lock); + raw_spin_unlock_irqrestore(&desc->lock, flags); } /* diff --git a/arch/metag/kernel/kick.c b/arch/metag/kernel/kick.c index 50fcbec98cd2..beb377621322 100644 --- a/arch/metag/kernel/kick.c +++ b/arch/metag/kernel/kick.c @@ -26,6 +26,8 @@ * pass it as an argument. */ #include <linux/export.h> +#include <linux/hardirq.h> +#include <linux/irq.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/types.h> @@ -66,6 +68,7 @@ EXPORT_SYMBOL(kick_unregister_func); TBIRES kick_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI) { + struct pt_regs *old_regs; struct kick_irq_handler *kh; struct list_head *lh; int handled = 0; @@ -79,6 +82,9 @@ kick_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI) trace_hardirqs_off(); + old_regs = set_irq_regs((struct pt_regs *)State.Sig.pCtx); + irq_enter(); + /* * There is no need to disable interrupts here because we * can't nest KICK interrupts in a KICK interrupt handler. @@ -97,5 +103,8 @@ kick_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI) WARN_ON(!handled); + irq_exit(); + set_irq_regs(old_regs); + return tail_end(ret); } diff --git a/arch/metag/kernel/metag_ksyms.c b/arch/metag/kernel/metag_ksyms.c index ec872ef14eb1..215c94ad63ac 100644 --- a/arch/metag/kernel/metag_ksyms.c +++ b/arch/metag/kernel/metag_ksyms.c @@ -1,5 +1,7 @@ #include <linux/export.h> +#include <linux/types.h> +#include <asm/checksum.h> #include <asm/div64.h> #include <asm/ftrace.h> #include <asm/page.h> @@ -15,6 +17,9 @@ EXPORT_SYMBOL(max_pfn); EXPORT_SYMBOL(min_low_pfn); #endif +/* Network checksum functions */ +EXPORT_SYMBOL(csum_partial); + /* TBI symbols */ EXPORT_SYMBOL(__TBI); EXPORT_SYMBOL(__TBIFindSeg); diff --git a/arch/metag/kernel/setup.c b/arch/metag/kernel/setup.c index 4f5726f1a55b..c396cd0b425f 100644 --- a/arch/metag/kernel/setup.c +++ b/arch/metag/kernel/setup.c @@ -20,6 +20,7 @@ #include <linux/memblock.h> #include <linux/mm.h> #include <linux/of_fdt.h> +#include <linux/of_platform.h> #include <linux/pfn.h> #include <linux/root_dev.h> #include <linux/sched.h> @@ -424,6 +425,9 @@ static int __init customize_machine(void) /* customizes platform devices, or adds new ones */ if (machine_desc->init_machine) machine_desc->init_machine(); + else + of_platform_populate(NULL, of_default_bus_match_table, NULL, + NULL); return 0; } arch_initcall(customize_machine); @@ -587,20 +591,20 @@ PTBI pTBI_get(unsigned int cpu) EXPORT_SYMBOL(pTBI_get); #if defined(CONFIG_METAG_DSP) && defined(CONFIG_METAG_FPU) -char capabilites[] = "dsp fpu"; +static char capabilities[] = "dsp fpu"; #elif defined(CONFIG_METAG_DSP) -char capabilites[] = "dsp"; +static char capabilities[] = "dsp"; #elif defined(CONFIG_METAG_FPU) -char capabilites[] = "fpu"; +static char capabilities[] = "fpu"; #else -char capabilites[] = ""; +static char capabilities[] = ""; #endif static struct ctl_table caps_kern_table[] = { { .procname = "capabilities", - .data = capabilites, - .maxlen = sizeof(capabilites), + .data = capabilities, + .maxlen = sizeof(capabilities), .mode = 0444, .proc_handler = proc_dostring, }, diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index f443ec9a7cbe..e413875cf6d2 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ #include <linux/atomic.h> +#include <linux/completion.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/spinlock.h> @@ -62,6 +63,8 @@ static DEFINE_PER_CPU(struct ipi_data, ipi_data) = { static DEFINE_SPINLOCK(boot_lock); +static DECLARE_COMPLETION(cpu_running); + /* * "thread" is assumed to be a valid Meta hardware thread ID. */ @@ -235,20 +238,12 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) */ ret = boot_secondary(thread, idle); if (ret == 0) { - unsigned long timeout; - /* * CPU was successfully started, wait for it * to come online or time out. */ - timeout = jiffies + HZ; - while (time_before(jiffies, timeout)) { - if (cpu_online(cpu)) - break; - - udelay(10); - barrier(); - } + wait_for_completion_timeout(&cpu_running, + msecs_to_jiffies(1000)); if (!cpu_online(cpu)) ret = -EIO; @@ -276,7 +271,6 @@ static DECLARE_COMPLETION(cpu_killed); int __cpuexit __cpu_disable(void) { unsigned int cpu = smp_processor_id(); - struct task_struct *p; /* * Take this CPU offline. Once we clear this, we can't return, @@ -296,12 +290,7 @@ int __cpuexit __cpu_disable(void) flush_cache_all(); local_flush_tlb_all(); - read_lock(&tasklist_lock); - for_each_process(p) { - if (p->mm) - cpumask_clear_cpu(cpu, mm_cpumask(p->mm)); - } - read_unlock(&tasklist_lock); + clear_tasks_mm_cpumask(cpu); return 0; } @@ -385,12 +374,7 @@ asmlinkage void secondary_start_kernel(void) setup_priv(); - /* - * Enable local interrupts. - */ - tbi_startup_interrupt(TBID_SIGNUM_TRT); notify_cpu_starting(cpu); - local_irq_enable(); pr_info("CPU%u (thread %u): Booted secondary processor\n", cpu, cpu_2_hwthread_id[cpu]); @@ -402,12 +386,13 @@ asmlinkage void secondary_start_kernel(void) * OK, now it's safe to let the boot CPU continue */ set_cpu_online(cpu, true); + complete(&cpu_running); /* - * Check for cache aliasing. - * Preemption is disabled + * Enable local interrupts. */ - check_for_cache_aliasing(cpu); + tbi_startup_interrupt(TBID_SIGNUM_TRT); + local_irq_enable(); /* * OK, it's off to the idle thread for us diff --git a/arch/metag/kernel/time.c b/arch/metag/kernel/time.c index 17dc10733b2f..f1c8c53dace7 100644 --- a/arch/metag/kernel/time.c +++ b/arch/metag/kernel/time.c @@ -5,11 +5,21 @@ * */ -#include <linux/init.h> - #include <clocksource/metag_generic.h> +#include <linux/clk-provider.h> +#include <linux/init.h> +#include <asm/clock.h> void __init time_init(void) { +#ifdef CONFIG_COMMON_CLK + /* Init clocks from device tree */ + of_clk_init(NULL); +#endif + + /* Init meta clocks, particularly the core clock */ + init_metag_clocks(); + + /* Set up the timer clock sources */ metag_generic_timer_init(); } diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c index 2ceeaae5b199..c00ade0228ef 100644 --- a/arch/metag/kernel/traps.c +++ b/arch/metag/kernel/traps.c @@ -33,6 +33,7 @@ #include <asm/siginfo.h> #include <asm/traps.h> #include <asm/hwthread.h> +#include <asm/setup.h> #include <asm/switch.h> #include <asm/user_gateway.h> #include <asm/syscall.h> @@ -87,8 +88,8 @@ const char *trap_name(int trapno) static DEFINE_SPINLOCK(die_lock); -void die(const char *str, struct pt_regs *regs, long err, - unsigned long addr) +void __noreturn die(const char *str, struct pt_regs *regs, + long err, unsigned long addr) { static int die_counter; diff --git a/arch/metag/lib/checksum.c b/arch/metag/lib/checksum.c index 44d2e1913560..5d6a98a05e9d 100644 --- a/arch/metag/lib/checksum.c +++ b/arch/metag/lib/checksum.c @@ -124,7 +124,6 @@ __wsum csum_partial(const void *buff, int len, __wsum wsum) result += 1; return (__force __wsum)result; } -EXPORT_SYMBOL(csum_partial); /* * this routine is used for miscellaneous IP-like checksums, mainly diff --git a/arch/metag/mm/cache.c b/arch/metag/mm/cache.c index b5d3b2e7c160..a62285284ab8 100644 --- a/arch/metag/mm/cache.c +++ b/arch/metag/mm/cache.c @@ -45,7 +45,7 @@ static volatile u32 lnkget_testdata[16] __initdata __aligned(64); #define LNKGET_CONSTANT 0xdeadbeef -void __init metag_lnkget_probe(void) +static void __init metag_lnkget_probe(void) { int temp; long flags; diff --git a/arch/mips/include/asm/mach-jz4740/dma.h b/arch/mips/include/asm/mach-jz4740/dma.h index 98b4e7c0dbae..509cd5828044 100644 --- a/arch/mips/include/asm/mach-jz4740/dma.h +++ b/arch/mips/include/asm/mach-jz4740/dma.h @@ -16,8 +16,6 @@ #ifndef __ASM_MACH_JZ4740_DMA_H__ #define __ASM_MACH_JZ4740_DMA_H__ -struct jz4740_dma_chan; - enum jz4740_dma_request_type { JZ4740_DMA_TYPE_AUTO_REQUEST = 8, JZ4740_DMA_TYPE_UART_TRANSMIT = 20, @@ -33,58 +31,4 @@ enum jz4740_dma_request_type { JZ4740_DMA_TYPE_SLCD = 30, }; -enum jz4740_dma_width { - JZ4740_DMA_WIDTH_32BIT = 0, - JZ4740_DMA_WIDTH_8BIT = 1, - JZ4740_DMA_WIDTH_16BIT = 2, -}; - -enum jz4740_dma_transfer_size { - JZ4740_DMA_TRANSFER_SIZE_4BYTE = 0, - JZ4740_DMA_TRANSFER_SIZE_1BYTE = 1, - JZ4740_DMA_TRANSFER_SIZE_2BYTE = 2, - JZ4740_DMA_TRANSFER_SIZE_16BYTE = 3, - JZ4740_DMA_TRANSFER_SIZE_32BYTE = 4, -}; - -enum jz4740_dma_flags { - JZ4740_DMA_SRC_AUTOINC = 0x2, - JZ4740_DMA_DST_AUTOINC = 0x1, -}; - -enum jz4740_dma_mode { - JZ4740_DMA_MODE_SINGLE = 0, - JZ4740_DMA_MODE_BLOCK = 1, -}; - -struct jz4740_dma_config { - enum jz4740_dma_width src_width; - enum jz4740_dma_width dst_width; - enum jz4740_dma_transfer_size transfer_size; - enum jz4740_dma_request_type request_type; - enum jz4740_dma_flags flags; - enum jz4740_dma_mode mode; -}; - -typedef void (*jz4740_dma_complete_callback_t)(struct jz4740_dma_chan *, int, void *); - -struct jz4740_dma_chan *jz4740_dma_request(void *dev, const char *name); -void jz4740_dma_free(struct jz4740_dma_chan *dma); - -void jz4740_dma_configure(struct jz4740_dma_chan *dma, - const struct jz4740_dma_config *config); - - -void jz4740_dma_enable(struct jz4740_dma_chan *dma); -void jz4740_dma_disable(struct jz4740_dma_chan *dma); - -void jz4740_dma_set_src_addr(struct jz4740_dma_chan *dma, dma_addr_t src); -void jz4740_dma_set_dst_addr(struct jz4740_dma_chan *dma, dma_addr_t dst); -void jz4740_dma_set_transfer_count(struct jz4740_dma_chan *dma, uint32_t count); - -uint32_t jz4740_dma_get_residue(const struct jz4740_dma_chan *dma); - -void jz4740_dma_set_complete_cb(struct jz4740_dma_chan *dma, - jz4740_dma_complete_callback_t cb); - #endif /* __ASM_JZ4740_DMA_H__ */ diff --git a/arch/mips/include/asm/mach-jz4740/platform.h b/arch/mips/include/asm/mach-jz4740/platform.h index 72cfebdb5a47..05988c2d6565 100644 --- a/arch/mips/include/asm/mach-jz4740/platform.h +++ b/arch/mips/include/asm/mach-jz4740/platform.h @@ -32,6 +32,7 @@ extern struct platform_device jz4740_codec_device; extern struct platform_device jz4740_adc_device; extern struct platform_device jz4740_wdt_device; extern struct platform_device jz4740_pwm_device; +extern struct platform_device jz4740_dma_device; void jz4740_serial_device_register(void); diff --git a/arch/mips/jz4740/Makefile b/arch/mips/jz4740/Makefile index 63bad0e491d0..28e5535dfa9e 100644 --- a/arch/mips/jz4740/Makefile +++ b/arch/mips/jz4740/Makefile @@ -4,7 +4,7 @@ # Object file lists. -obj-y += prom.o irq.o time.o reset.o setup.o dma.o \ +obj-y += prom.o irq.o time.o reset.o setup.o \ gpio.o clock.o platform.o timer.o serial.o obj-$(CONFIG_DEBUG_FS) += clock-debugfs.o diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c index be2b3deeef1d..8a5ec0eedeb0 100644 --- a/arch/mips/jz4740/board-qi_lb60.c +++ b/arch/mips/jz4740/board-qi_lb60.c @@ -438,6 +438,7 @@ static struct platform_device *jz_platform_devices[] __initdata = { &jz4740_rtc_device, &jz4740_adc_device, &jz4740_pwm_device, + &jz4740_dma_device, &qi_lb60_gpio_keys, &qi_lb60_pwm_beeper, &qi_lb60_charger_device, diff --git a/arch/mips/jz4740/clock.c b/arch/mips/jz4740/clock.c index 484d38a0864f..1b5f55426cad 100644 --- a/arch/mips/jz4740/clock.c +++ b/arch/mips/jz4740/clock.c @@ -687,7 +687,7 @@ static struct clk jz4740_clock_simple_clks[] = { [3] = { .name = "dma", .parent = &jz_clk_high_speed_peripheral.clk, - .gate_bit = JZ_CLOCK_GATE_UART0, + .gate_bit = JZ_CLOCK_GATE_DMAC, .ops = &jz_clk_simple_ops, }, [4] = { diff --git a/arch/mips/jz4740/dma.c b/arch/mips/jz4740/dma.c deleted file mode 100644 index 317ec6fffb12..000000000000 --- a/arch/mips/jz4740/dma.c +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 SoC DMA support - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/interrupt.h> - -#include <linux/dma-mapping.h> -#include <asm/mach-jz4740/dma.h> -#include <asm/mach-jz4740/base.h> - -#define JZ_REG_DMA_SRC_ADDR(x) (0x00 + (x) * 0x20) -#define JZ_REG_DMA_DST_ADDR(x) (0x04 + (x) * 0x20) -#define JZ_REG_DMA_TRANSFER_COUNT(x) (0x08 + (x) * 0x20) -#define JZ_REG_DMA_REQ_TYPE(x) (0x0C + (x) * 0x20) -#define JZ_REG_DMA_STATUS_CTRL(x) (0x10 + (x) * 0x20) -#define JZ_REG_DMA_CMD(x) (0x14 + (x) * 0x20) -#define JZ_REG_DMA_DESC_ADDR(x) (0x18 + (x) * 0x20) - -#define JZ_REG_DMA_CTRL 0x300 -#define JZ_REG_DMA_IRQ 0x304 -#define JZ_REG_DMA_DOORBELL 0x308 -#define JZ_REG_DMA_DOORBELL_SET 0x30C - -#define JZ_DMA_STATUS_CTRL_NO_DESC BIT(31) -#define JZ_DMA_STATUS_CTRL_DESC_INV BIT(6) -#define JZ_DMA_STATUS_CTRL_ADDR_ERR BIT(4) -#define JZ_DMA_STATUS_CTRL_TRANSFER_DONE BIT(3) -#define JZ_DMA_STATUS_CTRL_HALT BIT(2) -#define JZ_DMA_STATUS_CTRL_COUNT_TERMINATE BIT(1) -#define JZ_DMA_STATUS_CTRL_ENABLE BIT(0) - -#define JZ_DMA_CMD_SRC_INC BIT(23) -#define JZ_DMA_CMD_DST_INC BIT(22) -#define JZ_DMA_CMD_RDIL_MASK (0xf << 16) -#define JZ_DMA_CMD_SRC_WIDTH_MASK (0x3 << 14) -#define JZ_DMA_CMD_DST_WIDTH_MASK (0x3 << 12) -#define JZ_DMA_CMD_INTERVAL_LENGTH_MASK (0x7 << 8) -#define JZ_DMA_CMD_BLOCK_MODE BIT(7) -#define JZ_DMA_CMD_DESC_VALID BIT(4) -#define JZ_DMA_CMD_DESC_VALID_MODE BIT(3) -#define JZ_DMA_CMD_VALID_IRQ_ENABLE BIT(2) -#define JZ_DMA_CMD_TRANSFER_IRQ_ENABLE BIT(1) -#define JZ_DMA_CMD_LINK_ENABLE BIT(0) - -#define JZ_DMA_CMD_FLAGS_OFFSET 22 -#define JZ_DMA_CMD_RDIL_OFFSET 16 -#define JZ_DMA_CMD_SRC_WIDTH_OFFSET 14 -#define JZ_DMA_CMD_DST_WIDTH_OFFSET 12 -#define JZ_DMA_CMD_TRANSFER_SIZE_OFFSET 8 -#define JZ_DMA_CMD_MODE_OFFSET 7 - -#define JZ_DMA_CTRL_PRIORITY_MASK (0x3 << 8) -#define JZ_DMA_CTRL_HALT BIT(3) -#define JZ_DMA_CTRL_ADDRESS_ERROR BIT(2) -#define JZ_DMA_CTRL_ENABLE BIT(0) - - -static void __iomem *jz4740_dma_base; -static spinlock_t jz4740_dma_lock; - -static inline uint32_t jz4740_dma_read(size_t reg) -{ - return readl(jz4740_dma_base + reg); -} - -static inline void jz4740_dma_write(size_t reg, uint32_t val) -{ - writel(val, jz4740_dma_base + reg); -} - -static inline void jz4740_dma_write_mask(size_t reg, uint32_t val, uint32_t mask) -{ - uint32_t val2; - val2 = jz4740_dma_read(reg); - val2 &= ~mask; - val2 |= val; - jz4740_dma_write(reg, val2); -} - -struct jz4740_dma_chan { - unsigned int id; - void *dev; - const char *name; - - enum jz4740_dma_flags flags; - uint32_t transfer_shift; - - jz4740_dma_complete_callback_t complete_cb; - - unsigned used:1; -}; - -#define JZ4740_DMA_CHANNEL(_id) { .id = _id } - -struct jz4740_dma_chan jz4740_dma_channels[] = { - JZ4740_DMA_CHANNEL(0), - JZ4740_DMA_CHANNEL(1), - JZ4740_DMA_CHANNEL(2), - JZ4740_DMA_CHANNEL(3), - JZ4740_DMA_CHANNEL(4), - JZ4740_DMA_CHANNEL(5), -}; - -struct jz4740_dma_chan *jz4740_dma_request(void *dev, const char *name) -{ - unsigned int i; - struct jz4740_dma_chan *dma = NULL; - - spin_lock(&jz4740_dma_lock); - - for (i = 0; i < ARRAY_SIZE(jz4740_dma_channels); ++i) { - if (!jz4740_dma_channels[i].used) { - dma = &jz4740_dma_channels[i]; - dma->used = 1; - break; - } - } - - spin_unlock(&jz4740_dma_lock); - - if (!dma) - return NULL; - - dma->dev = dev; - dma->name = name; - - return dma; -} -EXPORT_SYMBOL_GPL(jz4740_dma_request); - -void jz4740_dma_configure(struct jz4740_dma_chan *dma, - const struct jz4740_dma_config *config) -{ - uint32_t cmd; - - switch (config->transfer_size) { - case JZ4740_DMA_TRANSFER_SIZE_2BYTE: - dma->transfer_shift = 1; - break; - case JZ4740_DMA_TRANSFER_SIZE_4BYTE: - dma->transfer_shift = 2; - break; - case JZ4740_DMA_TRANSFER_SIZE_16BYTE: - dma->transfer_shift = 4; - break; - case JZ4740_DMA_TRANSFER_SIZE_32BYTE: - dma->transfer_shift = 5; - break; - default: - dma->transfer_shift = 0; - break; - } - - cmd = config->flags << JZ_DMA_CMD_FLAGS_OFFSET; - cmd |= config->src_width << JZ_DMA_CMD_SRC_WIDTH_OFFSET; - cmd |= config->dst_width << JZ_DMA_CMD_DST_WIDTH_OFFSET; - cmd |= config->transfer_size << JZ_DMA_CMD_TRANSFER_SIZE_OFFSET; - cmd |= config->mode << JZ_DMA_CMD_MODE_OFFSET; - cmd |= JZ_DMA_CMD_TRANSFER_IRQ_ENABLE; - - jz4740_dma_write(JZ_REG_DMA_CMD(dma->id), cmd); - jz4740_dma_write(JZ_REG_DMA_STATUS_CTRL(dma->id), 0); - jz4740_dma_write(JZ_REG_DMA_REQ_TYPE(dma->id), config->request_type); -} -EXPORT_SYMBOL_GPL(jz4740_dma_configure); - -void jz4740_dma_set_src_addr(struct jz4740_dma_chan *dma, dma_addr_t src) -{ - jz4740_dma_write(JZ_REG_DMA_SRC_ADDR(dma->id), src); -} -EXPORT_SYMBOL_GPL(jz4740_dma_set_src_addr); - -void jz4740_dma_set_dst_addr(struct jz4740_dma_chan *dma, dma_addr_t dst) -{ - jz4740_dma_write(JZ_REG_DMA_DST_ADDR(dma->id), dst); -} -EXPORT_SYMBOL_GPL(jz4740_dma_set_dst_addr); - -void jz4740_dma_set_transfer_count(struct jz4740_dma_chan *dma, uint32_t count) -{ - count >>= dma->transfer_shift; - jz4740_dma_write(JZ_REG_DMA_TRANSFER_COUNT(dma->id), count); -} -EXPORT_SYMBOL_GPL(jz4740_dma_set_transfer_count); - -void jz4740_dma_set_complete_cb(struct jz4740_dma_chan *dma, - jz4740_dma_complete_callback_t cb) -{ - dma->complete_cb = cb; -} -EXPORT_SYMBOL_GPL(jz4740_dma_set_complete_cb); - -void jz4740_dma_free(struct jz4740_dma_chan *dma) -{ - dma->dev = NULL; - dma->complete_cb = NULL; - dma->used = 0; -} -EXPORT_SYMBOL_GPL(jz4740_dma_free); - -void jz4740_dma_enable(struct jz4740_dma_chan *dma) -{ - jz4740_dma_write_mask(JZ_REG_DMA_STATUS_CTRL(dma->id), - JZ_DMA_STATUS_CTRL_NO_DESC | JZ_DMA_STATUS_CTRL_ENABLE, - JZ_DMA_STATUS_CTRL_HALT | JZ_DMA_STATUS_CTRL_NO_DESC | - JZ_DMA_STATUS_CTRL_ENABLE); - - jz4740_dma_write_mask(JZ_REG_DMA_CTRL, - JZ_DMA_CTRL_ENABLE, - JZ_DMA_CTRL_HALT | JZ_DMA_CTRL_ENABLE); -} -EXPORT_SYMBOL_GPL(jz4740_dma_enable); - -void jz4740_dma_disable(struct jz4740_dma_chan *dma) -{ - jz4740_dma_write_mask(JZ_REG_DMA_STATUS_CTRL(dma->id), 0, - JZ_DMA_STATUS_CTRL_ENABLE); -} -EXPORT_SYMBOL_GPL(jz4740_dma_disable); - -uint32_t jz4740_dma_get_residue(const struct jz4740_dma_chan *dma) -{ - uint32_t residue; - residue = jz4740_dma_read(JZ_REG_DMA_TRANSFER_COUNT(dma->id)); - return residue << dma->transfer_shift; -} -EXPORT_SYMBOL_GPL(jz4740_dma_get_residue); - -static void jz4740_dma_chan_irq(struct jz4740_dma_chan *dma) -{ - (void) jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id)); - - jz4740_dma_write_mask(JZ_REG_DMA_STATUS_CTRL(dma->id), 0, - JZ_DMA_STATUS_CTRL_ENABLE | JZ_DMA_STATUS_CTRL_TRANSFER_DONE); - - if (dma->complete_cb) - dma->complete_cb(dma, 0, dma->dev); -} - -static irqreturn_t jz4740_dma_irq(int irq, void *dev_id) -{ - uint32_t irq_status; - unsigned int i; - - irq_status = readl(jz4740_dma_base + JZ_REG_DMA_IRQ); - - for (i = 0; i < 6; ++i) { - if (irq_status & (1 << i)) - jz4740_dma_chan_irq(&jz4740_dma_channels[i]); - } - - return IRQ_HANDLED; -} - -static int jz4740_dma_init(void) -{ - unsigned int ret; - - jz4740_dma_base = ioremap(JZ4740_DMAC_BASE_ADDR, 0x400); - - if (!jz4740_dma_base) - return -EBUSY; - - spin_lock_init(&jz4740_dma_lock); - - ret = request_irq(JZ4740_IRQ_DMAC, jz4740_dma_irq, 0, "DMA", NULL); - - if (ret) - printk(KERN_ERR "JZ4740 DMA: Failed to request irq: %d\n", ret); - - return ret; -} -arch_initcall(jz4740_dma_init); diff --git a/arch/mips/jz4740/platform.c b/arch/mips/jz4740/platform.c index e9348fd26a35..df65677f3d0b 100644 --- a/arch/mips/jz4740/platform.c +++ b/arch/mips/jz4740/platform.c @@ -329,3 +329,24 @@ struct platform_device jz4740_pwm_device = { .name = "jz4740-pwm", .id = -1, }; + +/* DMA */ +static struct resource jz4740_dma_resources[] = { + { + .start = JZ4740_DMAC_BASE_ADDR, + .end = JZ4740_DMAC_BASE_ADDR + 0x400 - 1, + .flags = IORESOURCE_MEM, + }, + { + .start = JZ4740_IRQ_DMAC, + .end = JZ4740_IRQ_DMAC, + .flags = IORESOURCE_IRQ, + }, +}; + +struct platform_device jz4740_dma_device = { + .name = "jz4740-dma", + .id = -1, + .num_resources = ARRAY_SIZE(jz4740_dma_resources), + .resource = jz4740_dma_resources, +}; diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c index 8c6dc42ecf65..9e5dfbcc00af 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.c +++ b/arch/powerpc/platforms/cell/beat_interrupt.c @@ -239,7 +239,7 @@ void __init beatic_init_IRQ(void) ppc_md.get_irq = beatic_get_irq; /* Allocate an irq host */ - beatic_host = irq_domain_add_nomap(NULL, 0, &beatic_pic_host_ops, NULL); + beatic_host = irq_domain_add_nomap(NULL, ~0, &beatic_pic_host_ops, NULL); BUG_ON(beatic_host == NULL); irq_set_default_host(beatic_host); } diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 49c9f9501c21..5cbd4d67d5c4 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -192,7 +192,7 @@ static int psurge_secondary_ipi_init(void) { int rc = -ENOMEM; - psurge_host = irq_domain_add_nomap(NULL, 0, &psurge_host_ops, NULL); + psurge_host = irq_domain_add_nomap(NULL, ~0, &psurge_host_ops, NULL); if (psurge_host) psurge_secondary_virq = irq_create_direct_mapping(psurge_host); diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index a3a0ed80f17c..7d6ba9db1be9 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -3,8 +3,6 @@ # avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ - $(comma)4)$(comma)%ymm2,yes,no) obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o @@ -29,6 +27,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o # These modules require assembler to support AVX. ifeq ($(avx_supported),yes) @@ -42,10 +41,8 @@ endif # These modules require assembler to support AVX2. ifeq ($(avx2_supported),yes) - obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o - obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o endif aes-i586-y := aes-i586-asm_32.o aes_glue.o @@ -73,10 +70,8 @@ ifeq ($(avx_supported),yes) endif ifeq ($(avx2_supported),yes) - blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o - twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o endif aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o @@ -87,3 +82,4 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o +crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o diff --git a/arch/x86/crypto/blowfish-avx2-asm_64.S b/arch/x86/crypto/blowfish-avx2-asm_64.S deleted file mode 100644 index 784452e0d05d..000000000000 --- a/arch/x86/crypto/blowfish-avx2-asm_64.S +++ /dev/null @@ -1,449 +0,0 @@ -/* - * x86_64/AVX2 assembler optimized version of Blowfish - * - * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ - -#include <linux/linkage.h> - -.file "blowfish-avx2-asm_64.S" - -.data -.align 32 - -.Lprefetch_mask: -.long 0*64 -.long 1*64 -.long 2*64 -.long 3*64 -.long 4*64 -.long 5*64 -.long 6*64 -.long 7*64 - -.Lbswap32_mask: -.long 0x00010203 -.long 0x04050607 -.long 0x08090a0b -.long 0x0c0d0e0f - -.Lbswap128_mask: - .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -.Lbswap_iv_mask: - .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0 - -.text -/* structure of crypto context */ -#define p 0 -#define s0 ((16 + 2) * 4) -#define s1 ((16 + 2 + (1 * 256)) * 4) -#define s2 ((16 + 2 + (2 * 256)) * 4) -#define s3 ((16 + 2 + (3 * 256)) * 4) - -/* register macros */ -#define CTX %rdi -#define RIO %rdx - -#define RS0 %rax -#define RS1 %r8 -#define RS2 %r9 -#define RS3 %r10 - -#define RLOOP %r11 -#define RLOOPd %r11d - -#define RXr0 %ymm8 -#define RXr1 %ymm9 -#define RXr2 %ymm10 -#define RXr3 %ymm11 -#define RXl0 %ymm12 -#define RXl1 %ymm13 -#define RXl2 %ymm14 -#define RXl3 %ymm15 - -/* temp regs */ -#define RT0 %ymm0 -#define RT0x %xmm0 -#define RT1 %ymm1 -#define RT1x %xmm1 -#define RIDX0 %ymm2 -#define RIDX1 %ymm3 -#define RIDX1x %xmm3 -#define RIDX2 %ymm4 -#define RIDX3 %ymm5 - -/* vpgatherdd mask and '-1' */ -#define RNOT %ymm6 - -/* byte mask, (-1 >> 24) */ -#define RBYTE %ymm7 - -/*********************************************************************** - * 32-way AVX2 blowfish - ***********************************************************************/ -#define F(xl, xr) \ - vpsrld $24, xl, RIDX0; \ - vpsrld $16, xl, RIDX1; \ - vpsrld $8, xl, RIDX2; \ - vpand RBYTE, RIDX1, RIDX1; \ - vpand RBYTE, RIDX2, RIDX2; \ - vpand RBYTE, xl, RIDX3; \ - \ - vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpcmpeqd RIDX0, RIDX0, RIDX0; \ - \ - vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \ - vpcmpeqd RIDX1, RIDX1, RIDX1; \ - vpaddd RT0, RT1, RT0; \ - \ - vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \ - vpxor RT0, RT1, RT0; \ - \ - vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpaddd RT0, RT1, RT0; \ - \ - vpxor RT0, xr, xr; - -#define add_roundkey(xl, nmem) \ - vpbroadcastd nmem, RT0; \ - vpxor RT0, xl ## 0, xl ## 0; \ - vpxor RT0, xl ## 1, xl ## 1; \ - vpxor RT0, xl ## 2, xl ## 2; \ - vpxor RT0, xl ## 3, xl ## 3; - -#define round_enc() \ - add_roundkey(RXr, p(CTX,RLOOP,4)); \ - F(RXl0, RXr0); \ - F(RXl1, RXr1); \ - F(RXl2, RXr2); \ - F(RXl3, RXr3); \ - \ - add_roundkey(RXl, p+4(CTX,RLOOP,4)); \ - F(RXr0, RXl0); \ - F(RXr1, RXl1); \ - F(RXr2, RXl2); \ - F(RXr3, RXl3); - -#define round_dec() \ - add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \ - F(RXl0, RXr0); \ - F(RXl1, RXr1); \ - F(RXl2, RXr2); \ - F(RXl3, RXr3); \ - \ - add_roundkey(RXl, p+4(CTX,RLOOP,4)); \ - F(RXr0, RXl0); \ - F(RXr1, RXl1); \ - F(RXr2, RXl2); \ - F(RXr3, RXl3); - -#define init_round_constants() \ - vpcmpeqd RNOT, RNOT, RNOT; \ - leaq s0(CTX), RS0; \ - leaq s1(CTX), RS1; \ - leaq s2(CTX), RS2; \ - leaq s3(CTX), RS3; \ - vpsrld $24, RNOT, RBYTE; - -#define transpose_2x2(x0, x1, t0) \ - vpunpckldq x0, x1, t0; \ - vpunpckhdq x0, x1, x1; \ - \ - vpunpcklqdq t0, x1, x0; \ - vpunpckhqdq t0, x1, x1; - -#define read_block(xl, xr) \ - vbroadcasti128 .Lbswap32_mask, RT1; \ - \ - vpshufb RT1, xl ## 0, xl ## 0; \ - vpshufb RT1, xr ## 0, xr ## 0; \ - vpshufb RT1, xl ## 1, xl ## 1; \ - vpshufb RT1, xr ## 1, xr ## 1; \ - vpshufb RT1, xl ## 2, xl ## 2; \ - vpshufb RT1, xr ## 2, xr ## 2; \ - vpshufb RT1, xl ## 3, xl ## 3; \ - vpshufb RT1, xr ## 3, xr ## 3; \ - \ - transpose_2x2(xl ## 0, xr ## 0, RT0); \ - transpose_2x2(xl ## 1, xr ## 1, RT0); \ - transpose_2x2(xl ## 2, xr ## 2, RT0); \ - transpose_2x2(xl ## 3, xr ## 3, RT0); - -#define write_block(xl, xr) \ - vbroadcasti128 .Lbswap32_mask, RT1; \ - \ - transpose_2x2(xl ## 0, xr ## 0, RT0); \ - transpose_2x2(xl ## 1, xr ## 1, RT0); \ - transpose_2x2(xl ## 2, xr ## 2, RT0); \ - transpose_2x2(xl ## 3, xr ## 3, RT0); \ - \ - vpshufb RT1, xl ## 0, xl ## 0; \ - vpshufb RT1, xr ## 0, xr ## 0; \ - vpshufb RT1, xl ## 1, xl ## 1; \ - vpshufb RT1, xr ## 1, xr ## 1; \ - vpshufb RT1, xl ## 2, xl ## 2; \ - vpshufb RT1, xr ## 2, xr ## 2; \ - vpshufb RT1, xl ## 3, xl ## 3; \ - vpshufb RT1, xr ## 3, xr ## 3; - -.align 8 -__blowfish_enc_blk32: - /* input: - * %rdi: ctx, CTX - * RXl0..4, RXr0..4: plaintext - * output: - * RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped) - */ - init_round_constants(); - - read_block(RXl, RXr); - - movl $1, RLOOPd; - add_roundkey(RXl, p+4*(0)(CTX)); - -.align 4 -.L__enc_loop: - round_enc(); - - leal 2(RLOOPd), RLOOPd; - cmpl $17, RLOOPd; - jne .L__enc_loop; - - add_roundkey(RXr, p+4*(17)(CTX)); - - write_block(RXl, RXr); - - ret; -ENDPROC(__blowfish_enc_blk32) - -.align 8 -__blowfish_dec_blk32: - /* input: - * %rdi: ctx, CTX - * RXl0..4, RXr0..4: ciphertext - * output: - * RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped) - */ - init_round_constants(); - - read_block(RXl, RXr); - - movl $14, RLOOPd; - add_roundkey(RXl, p+4*(17)(CTX)); - -.align 4 -.L__dec_loop: - round_dec(); - - addl $-2, RLOOPd; - jns .L__dec_loop; - - add_roundkey(RXr, p+4*(0)(CTX)); - - write_block(RXl, RXr); - - ret; -ENDPROC(__blowfish_dec_blk32) - -ENTRY(blowfish_ecb_enc_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - vzeroupper; - - vmovdqu 0*32(%rdx), RXl0; - vmovdqu 1*32(%rdx), RXr0; - vmovdqu 2*32(%rdx), RXl1; - vmovdqu 3*32(%rdx), RXr1; - vmovdqu 4*32(%rdx), RXl2; - vmovdqu 5*32(%rdx), RXr2; - vmovdqu 6*32(%rdx), RXl3; - vmovdqu 7*32(%rdx), RXr3; - - call __blowfish_enc_blk32; - - vmovdqu RXr0, 0*32(%rsi); - vmovdqu RXl0, 1*32(%rsi); - vmovdqu RXr1, 2*32(%rsi); - vmovdqu RXl1, 3*32(%rsi); - vmovdqu RXr2, 4*32(%rsi); - vmovdqu RXl2, 5*32(%rsi); - vmovdqu RXr3, 6*32(%rsi); - vmovdqu RXl3, 7*32(%rsi); - - vzeroupper; - - ret; -ENDPROC(blowfish_ecb_enc_32way) - -ENTRY(blowfish_ecb_dec_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - vzeroupper; - - vmovdqu 0*32(%rdx), RXl0; - vmovdqu 1*32(%rdx), RXr0; - vmovdqu 2*32(%rdx), RXl1; - vmovdqu 3*32(%rdx), RXr1; - vmovdqu 4*32(%rdx), RXl2; - vmovdqu 5*32(%rdx), RXr2; - vmovdqu 6*32(%rdx), RXl3; - vmovdqu 7*32(%rdx), RXr3; - - call __blowfish_dec_blk32; - - vmovdqu RXr0, 0*32(%rsi); - vmovdqu RXl0, 1*32(%rsi); - vmovdqu RXr1, 2*32(%rsi); - vmovdqu RXl1, 3*32(%rsi); - vmovdqu RXr2, 4*32(%rsi); - vmovdqu RXl2, 5*32(%rsi); - vmovdqu RXr3, 6*32(%rsi); - vmovdqu RXl3, 7*32(%rsi); - - vzeroupper; - - ret; -ENDPROC(blowfish_ecb_dec_32way) - -ENTRY(blowfish_cbc_dec_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - vzeroupper; - - vmovdqu 0*32(%rdx), RXl0; - vmovdqu 1*32(%rdx), RXr0; - vmovdqu 2*32(%rdx), RXl1; - vmovdqu 3*32(%rdx), RXr1; - vmovdqu 4*32(%rdx), RXl2; - vmovdqu 5*32(%rdx), RXr2; - vmovdqu 6*32(%rdx), RXl3; - vmovdqu 7*32(%rdx), RXr3; - - call __blowfish_dec_blk32; - - /* xor with src */ - vmovq (%rdx), RT0x; - vpshufd $0x4f, RT0x, RT0x; - vinserti128 $1, 8(%rdx), RT0, RT0; - vpxor RT0, RXr0, RXr0; - vpxor 0*32+24(%rdx), RXl0, RXl0; - vpxor 1*32+24(%rdx), RXr1, RXr1; - vpxor 2*32+24(%rdx), RXl1, RXl1; - vpxor 3*32+24(%rdx), RXr2, RXr2; - vpxor 4*32+24(%rdx), RXl2, RXl2; - vpxor 5*32+24(%rdx), RXr3, RXr3; - vpxor 6*32+24(%rdx), RXl3, RXl3; - - vmovdqu RXr0, (0*32)(%rsi); - vmovdqu RXl0, (1*32)(%rsi); - vmovdqu RXr1, (2*32)(%rsi); - vmovdqu RXl1, (3*32)(%rsi); - vmovdqu RXr2, (4*32)(%rsi); - vmovdqu RXl2, (5*32)(%rsi); - vmovdqu RXr3, (6*32)(%rsi); - vmovdqu RXl3, (7*32)(%rsi); - - vzeroupper; - - ret; -ENDPROC(blowfish_cbc_dec_32way) - -ENTRY(blowfish_ctr_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (big endian, 64bit) - */ - - vzeroupper; - - vpcmpeqd RT0, RT0, RT0; - vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */ - - vpcmpeqd RT1x, RT1x, RT1x; - vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */ - vpxor RIDX0, RIDX0, RIDX0; - vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */ - - vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */ - - vpcmpeqd RT1, RT1, RT1; - vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */ - vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */ - - vbroadcasti128 .Lbswap_iv_mask, RIDX0; - vbroadcasti128 .Lbswap128_mask, RIDX1; - - /* load IV and byteswap */ - vmovq (%rcx), RT1x; - vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */ - vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */ - - /* construct IVs */ - vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */ - vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */ - vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */ - vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */ - vpsubq RIDX2, RT1, RT1; - vpshufb RIDX1, RT1, RXl1; - vpsubq RIDX2, RT1, RT1; - vpshufb RIDX1, RT1, RXr1; - vpsubq RIDX2, RT1, RT1; - vpshufb RIDX1, RT1, RXl2; - vpsubq RIDX2, RT1, RT1; - vpshufb RIDX1, RT1, RXr2; - vpsubq RIDX2, RT1, RT1; - vpshufb RIDX1, RT1, RXl3; - vpsubq RIDX2, RT1, RT1; - vpshufb RIDX1, RT1, RXr3; - - /* store last IV */ - vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */ - vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */ - vmovq RT1x, (%rcx); - - call __blowfish_enc_blk32; - - /* dst = src ^ iv */ - vpxor 0*32(%rdx), RXr0, RXr0; - vpxor 1*32(%rdx), RXl0, RXl0; - vpxor 2*32(%rdx), RXr1, RXr1; - vpxor 3*32(%rdx), RXl1, RXl1; - vpxor 4*32(%rdx), RXr2, RXr2; - vpxor 5*32(%rdx), RXl2, RXl2; - vpxor 6*32(%rdx), RXr3, RXr3; - vpxor 7*32(%rdx), RXl3, RXl3; - vmovdqu RXr0, (0*32)(%rsi); - vmovdqu RXl0, (1*32)(%rsi); - vmovdqu RXr1, (2*32)(%rsi); - vmovdqu RXl1, (3*32)(%rsi); - vmovdqu RXr2, (4*32)(%rsi); - vmovdqu RXl2, (5*32)(%rsi); - vmovdqu RXr3, (6*32)(%rsi); - vmovdqu RXl3, (7*32)(%rsi); - - vzeroupper; - - ret; -ENDPROC(blowfish_ctr_32way) diff --git a/arch/x86/crypto/blowfish_avx2_glue.c b/arch/x86/crypto/blowfish_avx2_glue.c deleted file mode 100644 index 4417e9aea78d..000000000000 --- a/arch/x86/crypto/blowfish_avx2_glue.c +++ /dev/null @@ -1,585 +0,0 @@ -/* - * Glue Code for x86_64/AVX2 assembler optimized version of Blowfish - * - * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: - * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/crypto.h> -#include <linux/err.h> -#include <crypto/algapi.h> -#include <crypto/blowfish.h> -#include <crypto/cryptd.h> -#include <crypto/ctr.h> -#include <asm/i387.h> -#include <asm/xcr.h> -#include <asm/xsave.h> -#include <asm/crypto/blowfish.h> -#include <asm/crypto/ablk_helper.h> -#include <crypto/scatterwalk.h> - -#define BF_AVX2_PARALLEL_BLOCKS 32 - -/* 32-way AVX2 parallel cipher functions */ -asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src, - __be64 *iv); - -static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - if (fpu_enabled) - return true; - - /* FPU is only used when chunk to be processed is large enough, so - * do not enable FPU until it is necessary. - */ - if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS) - return false; - - kernel_fpu_begin(); - return true; -} - -static inline void bf_fpu_end(bool fpu_enabled) -{ - if (fpu_enabled) - kernel_fpu_end(); -} - -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - bool enc) -{ - bool fpu_enabled = false; - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes; - int err; - - err = blkcipher_walk_virt(desc, walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; - - fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); - - /* Process multi-block AVX2 batch */ - if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { - do { - if (enc) - blowfish_ecb_enc_32way(ctx, wdst, wsrc); - else - blowfish_ecb_dec_32way(ctx, wdst, wsrc); - - wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS; - wdst += bsize * BF_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS; - } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Process multi-block batch */ - if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { - do { - if (enc) - blowfish_enc_blk_4way(ctx, wdst, wsrc); - else - blowfish_dec_blk_4way(ctx, wdst, wsrc); - - wsrc += bsize * BF_PARALLEL_BLOCKS; - wdst += bsize * BF_PARALLEL_BLOCKS; - nbytes -= bsize * BF_PARALLEL_BLOCKS; - } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (enc) - blowfish_enc_blk(ctx, wdst, wsrc); - else - blowfish_dec_blk(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = blkcipher_walk_done(desc, walk, nbytes); - } - - bf_fpu_end(fpu_enabled); - return err; -} - -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, true); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, false); -} - -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 *iv = (u64 *)walk->iv; - - do { - *dst = *src ^ *iv; - blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u64 *)walk->iv = *iv; - return nbytes; -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 last_iv; - int i; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process multi-block AVX2 batch */ - if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { - do { - nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1); - src -= BF_AVX2_PARALLEL_BLOCKS - 1; - dst -= BF_AVX2_PARALLEL_BLOCKS - 1; - - blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Process multi-block batch */ - if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { - u64 ivs[BF_PARALLEL_BLOCKS - 1]; - - do { - nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1); - src -= BF_PARALLEL_BLOCKS - 1; - dst -= BF_PARALLEL_BLOCKS - 1; - - for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++) - ivs[i] = src[i]; - - blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); - - for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++) - dst[i + 1] ^= ivs[i]; - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - for (;;) { - blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } - -done: - *dst ^= *(u64 *)walk->iv; - *(u64 *)walk->iv = last_iv; - - return nbytes; -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk.nbytes)) { - fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - bf_fpu_end(fpu_enabled); - return err; -} - -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *ctrblk = walk->iv; - u8 keystream[BF_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - blowfish_enc_blk(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); - - crypto_inc(ctrblk, BF_BLOCK_SIZE); -} - -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - int i; - - /* Process multi-block AVX2 batch */ - if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { - do { - blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src, - (__be64 *)walk->iv); - - src += BF_AVX2_PARALLEL_BLOCKS; - dst += BF_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS; - } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Process four block batch */ - if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { - __be64 ctrblocks[BF_PARALLEL_BLOCKS]; - u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); - - do { - /* create ctrblks for parallel encrypt */ - for (i = 0; i < BF_PARALLEL_BLOCKS; i++) { - if (dst != src) - dst[i] = src[i]; - - ctrblocks[i] = cpu_to_be64(ctrblk++); - } - - blowfish_enc_blk_xor_4way(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += BF_PARALLEL_BLOCKS; - dst += BF_PARALLEL_BLOCKS; - nbytes -= bsize * BF_PARALLEL_BLOCKS; - } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); - - *(__be64 *)walk->iv = cpu_to_be64(ctrblk); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - u64 ctrblk; - - if (dst != src) - *dst = *src; - - ctrblk = *(u64 *)walk->iv; - be64_add_cpu((__be64 *)walk->iv, 1); - - blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); - - src += 1; - dst += 1; - } while ((nbytes -= bsize) >= bsize); - -done: - return nbytes; -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { - fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - bf_fpu_end(fpu_enabled); - - if (walk.nbytes) { - ctr_crypt_final(desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} - -static struct crypto_alg bf_algs[6] = { { - .cra_name = "__ecb-blowfish-avx2", - .cra_driver_name = "__driver-ecb-blowfish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .setkey = blowfish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-blowfish-avx2", - .cra_driver_name = "__driver-cbc-blowfish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .setkey = blowfish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-blowfish-avx2", - .cra_driver_name = "__driver-ctr-blowfish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "ecb(blowfish)", - .cra_driver_name = "ecb-blowfish-avx2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(blowfish)", - .cra_driver_name = "cbc-blowfish-avx2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(blowfish)", - .cra_driver_name = "ctr-blowfish-avx2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -} }; - - -static int __init init(void) -{ - u64 xcr0; - - if (!cpu_has_avx2 || !cpu_has_osxsave) { - pr_info("AVX2 instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); - return -ENODEV; - } - - return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); -} - -static void __exit fini(void) -{ - crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized"); -MODULE_ALIAS("blowfish"); -MODULE_ALIAS("blowfish-asm"); diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 3548d76dbaa9..50ec333b70e6 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -1,7 +1,7 @@ /* * Glue Code for assembler optimized version of Blowfish * - * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> * * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> @@ -32,24 +32,40 @@ #include <linux/module.h> #include <linux/types.h> #include <crypto/algapi.h> -#include <asm/crypto/blowfish.h> /* regular block cipher functions */ asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, bool xor); -EXPORT_SYMBOL_GPL(__blowfish_enc_blk); - asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); -EXPORT_SYMBOL_GPL(blowfish_dec_blk); /* 4-way parallel cipher functions */ asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src, bool xor); -EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way); - asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src); -EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way); + +static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) +{ + __blowfish_enc_blk(ctx, dst, src, false); +} + +static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk(ctx, dst, src, true); +} + +static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk_4way(ctx, dst, src, false); +} + +static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk_4way(ctx, dst, src, true); +} static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 91a1878fcc3e..0e0b8863a34b 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -51,16 +51,6 @@ #define ymm14_x xmm14 #define ymm15_x xmm15 -/* - * AES-NI instructions do not support ymmX registers, so we need splitting and - * merging. - */ -#define vaesenclast256(zero, yreg, tmp) \ - vextracti128 $1, yreg, tmp##_x; \ - vaesenclast zero##_x, yreg##_x, yreg##_x; \ - vaesenclast zero##_x, tmp##_x, tmp##_x; \ - vinserti128 $1, tmp##_x, yreg, yreg; - /********************************************************************** 32-way camellia **********************************************************************/ @@ -79,46 +69,70 @@ * S-function with AES subbytes \ */ \ vbroadcasti128 .Linv_shift_row, t4; \ - vpbroadcastb .L0f0f0f0f, t7; \ - vbroadcasti128 .Lpre_tf_lo_s1, t0; \ - vbroadcasti128 .Lpre_tf_hi_s1, t1; \ + vpbroadcastd .L0f0f0f0f, t7; \ + vbroadcasti128 .Lpre_tf_lo_s1, t5; \ + vbroadcasti128 .Lpre_tf_hi_s1, t6; \ + vbroadcasti128 .Lpre_tf_lo_s4, t2; \ + vbroadcasti128 .Lpre_tf_hi_s4, t3; \ \ /* AES inverse shift rows */ \ vpshufb t4, x0, x0; \ vpshufb t4, x7, x7; \ - vpshufb t4, x1, x1; \ - vpshufb t4, x4, x4; \ - vpshufb t4, x2, x2; \ - vpshufb t4, x5, x5; \ vpshufb t4, x3, x3; \ vpshufb t4, x6, x6; \ + vpshufb t4, x2, x2; \ + vpshufb t4, x5, x5; \ + vpshufb t4, x1, x1; \ + vpshufb t4, x4, x4; \ \ /* prefilter sboxes 1, 2 and 3 */ \ - vbroadcasti128 .Lpre_tf_lo_s4, t2; \ - vbroadcasti128 .Lpre_tf_hi_s4, t3; \ - filter_8bit(x0, t0, t1, t7, t6); \ - filter_8bit(x7, t0, t1, t7, t6); \ - filter_8bit(x1, t0, t1, t7, t6); \ - filter_8bit(x4, t0, t1, t7, t6); \ - filter_8bit(x2, t0, t1, t7, t6); \ - filter_8bit(x5, t0, t1, t7, t6); \ - \ /* prefilter sbox 4 */ \ + filter_8bit(x0, t5, t6, t7, t4); \ + filter_8bit(x7, t5, t6, t7, t4); \ + vextracti128 $1, x0, t0##_x; \ + vextracti128 $1, x7, t1##_x; \ + filter_8bit(x3, t2, t3, t7, t4); \ + filter_8bit(x6, t2, t3, t7, t4); \ + vextracti128 $1, x3, t3##_x; \ + vextracti128 $1, x6, t2##_x; \ + filter_8bit(x2, t5, t6, t7, t4); \ + filter_8bit(x5, t5, t6, t7, t4); \ + filter_8bit(x1, t5, t6, t7, t4); \ + filter_8bit(x4, t5, t6, t7, t4); \ + \ vpxor t4##_x, t4##_x, t4##_x; \ - filter_8bit(x3, t2, t3, t7, t6); \ - filter_8bit(x6, t2, t3, t7, t6); \ \ /* AES subbytes + AES shift rows */ \ + vextracti128 $1, x2, t6##_x; \ + vextracti128 $1, x5, t5##_x; \ + vaesenclast t4##_x, x0##_x, x0##_x; \ + vaesenclast t4##_x, t0##_x, t0##_x; \ + vinserti128 $1, t0##_x, x0, x0; \ + vaesenclast t4##_x, x7##_x, x7##_x; \ + vaesenclast t4##_x, t1##_x, t1##_x; \ + vinserti128 $1, t1##_x, x7, x7; \ + vaesenclast t4##_x, x3##_x, x3##_x; \ + vaesenclast t4##_x, t3##_x, t3##_x; \ + vinserti128 $1, t3##_x, x3, x3; \ + vaesenclast t4##_x, x6##_x, x6##_x; \ + vaesenclast t4##_x, t2##_x, t2##_x; \ + vinserti128 $1, t2##_x, x6, x6; \ + vextracti128 $1, x1, t3##_x; \ + vextracti128 $1, x4, t2##_x; \ vbroadcasti128 .Lpost_tf_lo_s1, t0; \ vbroadcasti128 .Lpost_tf_hi_s1, t1; \ - vaesenclast256(t4, x0, t5); \ - vaesenclast256(t4, x7, t5); \ - vaesenclast256(t4, x1, t5); \ - vaesenclast256(t4, x4, t5); \ - vaesenclast256(t4, x2, t5); \ - vaesenclast256(t4, x5, t5); \ - vaesenclast256(t4, x3, t5); \ - vaesenclast256(t4, x6, t5); \ + vaesenclast t4##_x, x2##_x, x2##_x; \ + vaesenclast t4##_x, t6##_x, t6##_x; \ + vinserti128 $1, t6##_x, x2, x2; \ + vaesenclast t4##_x, x5##_x, x5##_x; \ + vaesenclast t4##_x, t5##_x, t5##_x; \ + vinserti128 $1, t5##_x, x5, x5; \ + vaesenclast t4##_x, x1##_x, x1##_x; \ + vaesenclast t4##_x, t3##_x, t3##_x; \ + vinserti128 $1, t3##_x, x1, x1; \ + vaesenclast t4##_x, x4##_x, x4##_x; \ + vaesenclast t4##_x, t2##_x, t2##_x; \ + vinserti128 $1, t2##_x, x4, x4; \ \ /* postfilter sboxes 1 and 4 */ \ vbroadcasti128 .Lpost_tf_lo_s3, t2; \ @@ -139,22 +153,12 @@ /* postfilter sbox 2 */ \ filter_8bit(x1, t4, t5, t7, t2); \ filter_8bit(x4, t4, t5, t7, t2); \ + vpxor t7, t7, t7; \ \ vpsrldq $1, t0, t1; \ vpsrldq $2, t0, t2; \ + vpshufb t7, t1, t1; \ vpsrldq $3, t0, t3; \ - vpsrldq $4, t0, t4; \ - vpsrldq $5, t0, t5; \ - vpsrldq $6, t0, t6; \ - vpsrldq $7, t0, t7; \ - vpbroadcastb t0##_x, t0; \ - vpbroadcastb t1##_x, t1; \ - vpbroadcastb t2##_x, t2; \ - vpbroadcastb t3##_x, t3; \ - vpbroadcastb t4##_x, t4; \ - vpbroadcastb t6##_x, t6; \ - vpbroadcastb t5##_x, t5; \ - vpbroadcastb t7##_x, t7; \ \ /* P-function */ \ vpxor x5, x0, x0; \ @@ -162,11 +166,21 @@ vpxor x7, x2, x2; \ vpxor x4, x3, x3; \ \ + vpshufb t7, t2, t2; \ + vpsrldq $4, t0, t4; \ + vpshufb t7, t3, t3; \ + vpsrldq $5, t0, t5; \ + vpshufb t7, t4, t4; \ + \ vpxor x2, x4, x4; \ vpxor x3, x5, x5; \ vpxor x0, x6, x6; \ vpxor x1, x7, x7; \ \ + vpsrldq $6, t0, t6; \ + vpshufb t7, t5, t5; \ + vpshufb t7, t6, t6; \ + \ vpxor x7, x0, x0; \ vpxor x4, x1, x1; \ vpxor x5, x2, x2; \ @@ -179,12 +193,16 @@ \ /* Add key material and result to CD (x becomes new CD) */ \ \ - vpxor t7, x0, x0; \ - vpxor 4 * 32(mem_cd), x0, x0; \ - \ vpxor t6, x1, x1; \ vpxor 5 * 32(mem_cd), x1, x1; \ \ + vpsrldq $7, t0, t6; \ + vpshufb t7, t0, t0; \ + vpshufb t7, t6, t7; \ + \ + vpxor t7, x0, x0; \ + vpxor 4 * 32(mem_cd), x0, x0; \ + \ vpxor t5, x2, x2; \ vpxor 6 * 32(mem_cd), x2, x2; \ \ @@ -204,7 +222,7 @@ vpxor 3 * 32(mem_cd), x7, x7; /* - * Size optimization... with inlined roundsm16 binary would be over 5 times + * Size optimization... with inlined roundsm32 binary would be over 5 times * larger and would only marginally faster. */ .align 8 @@ -324,13 +342,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) */ \ vpbroadcastd kll, t0; /* only lowest 32-bit used */ \ vpxor tt0, tt0, tt0; \ - vpbroadcastb t0##_x, t3; \ + vpshufb tt0, t0, t3; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t2; \ + vpshufb tt0, t0, t2; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t1; \ + vpshufb tt0, t0, t1; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t0; \ + vpshufb tt0, t0, t0; \ \ vpand l0, t0, t0; \ vpand l1, t1, t1; \ @@ -340,6 +358,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ \ vpxor l4, t0, l4; \ + vpbroadcastd krr, t0; /* only lowest 32-bit used */ \ vmovdqu l4, 4 * 32(l); \ vpxor l5, t1, l5; \ vmovdqu l5, 5 * 32(l); \ @@ -354,14 +373,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) * rl ^= t2; \ */ \ \ - vpbroadcastd krr, t0; /* only lowest 32-bit used */ \ - vpbroadcastb t0##_x, t3; \ + vpshufb tt0, t0, t3; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t2; \ + vpshufb tt0, t0, t2; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t1; \ + vpshufb tt0, t0, t1; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t0; \ + vpshufb tt0, t0, t0; \ \ vpor 4 * 32(r), t0, t0; \ vpor 5 * 32(r), t1, t1; \ @@ -373,6 +391,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vpxor 2 * 32(r), t2, t2; \ vpxor 3 * 32(r), t3, t3; \ vmovdqu t0, 0 * 32(r); \ + vpbroadcastd krl, t0; /* only lowest 32-bit used */ \ vmovdqu t1, 1 * 32(r); \ vmovdqu t2, 2 * 32(r); \ vmovdqu t3, 3 * 32(r); \ @@ -382,14 +401,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) * t2 &= rl; \ * rr ^= rol32(t2, 1); \ */ \ - vpbroadcastd krl, t0; /* only lowest 32-bit used */ \ - vpbroadcastb t0##_x, t3; \ + vpshufb tt0, t0, t3; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t2; \ + vpshufb tt0, t0, t2; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t1; \ + vpshufb tt0, t0, t1; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t0; \ + vpshufb tt0, t0, t0; \ \ vpand 0 * 32(r), t0, t0; \ vpand 1 * 32(r), t1, t1; \ @@ -403,6 +421,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vpxor 6 * 32(r), t2, t2; \ vpxor 7 * 32(r), t3, t3; \ vmovdqu t0, 4 * 32(r); \ + vpbroadcastd klr, t0; /* only lowest 32-bit used */ \ vmovdqu t1, 5 * 32(r); \ vmovdqu t2, 6 * 32(r); \ vmovdqu t3, 7 * 32(r); \ @@ -413,14 +432,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) * ll ^= t0; \ */ \ \ - vpbroadcastd klr, t0; /* only lowest 32-bit used */ \ - vpbroadcastb t0##_x, t3; \ + vpshufb tt0, t0, t3; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t2; \ + vpshufb tt0, t0, t2; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t1; \ + vpshufb tt0, t0, t1; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t0; \ + vpshufb tt0, t0, t0; \ \ vpor l4, t0, t0; \ vpor l5, t1, t1; \ diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S new file mode 100644 index 000000000000..35e97569d05f --- /dev/null +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -0,0 +1,643 @@ +######################################################################## +# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions +# +# Copyright (c) 2013, Intel Corporation +# +# Authors: +# Erdinc Ozturk <erdinc.ozturk@intel.com> +# Vinodh Gopal <vinodh.gopal@intel.com> +# James Guilford <james.guilford@intel.com> +# Tim Chen <tim.c.chen@linux.intel.com> +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# +# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## +# Function API: +# UINT16 crc_t10dif_pcl( +# UINT16 init_crc, //initial CRC value, 16 bits +# const unsigned char *buf, //buffer pointer to calculate CRC on +# UINT64 len //buffer length in bytes (64-bit data) +# ); +# +# Reference paper titled "Fast CRC Computation for Generic +# Polynomials Using PCLMULQDQ Instruction" +# URL: http://www.intel.com/content/dam/www/public/us/en/documents +# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf +# +# + +#include <linux/linkage.h> + +.text + +#define arg1 %rdi +#define arg2 %rsi +#define arg3 %rdx + +#define arg1_low32 %edi + +ENTRY(crc_t10dif_pcl) +.align 16 + + # adjust the 16-bit initial_crc value, scale it to 32 bits + shl $16, arg1_low32 + + # Allocate Stack Space + mov %rsp, %rcx + sub $16*2, %rsp + # align stack to 16 byte boundary + and $~(0x10 - 1), %rsp + + # check if smaller than 256 + cmp $256, arg3 + + # for sizes less than 128, we can't fold 64B at a time... + jl _less_than_128 + + + # load the initial crc value + movd arg1_low32, %xmm10 # initial crc + + # crc value does not need to be byte-reflected, but it needs + # to be moved to the high part of the register. + # because data will be byte-reflected and will align with + # initial crc at correct place. + pslldq $12, %xmm10 + + movdqa SHUF_MASK(%rip), %xmm11 + # receive the initial 64B data, xor the initial crc value + movdqu 16*0(arg2), %xmm0 + movdqu 16*1(arg2), %xmm1 + movdqu 16*2(arg2), %xmm2 + movdqu 16*3(arg2), %xmm3 + movdqu 16*4(arg2), %xmm4 + movdqu 16*5(arg2), %xmm5 + movdqu 16*6(arg2), %xmm6 + movdqu 16*7(arg2), %xmm7 + + pshufb %xmm11, %xmm0 + # XOR the initial_crc value + pxor %xmm10, %xmm0 + pshufb %xmm11, %xmm1 + pshufb %xmm11, %xmm2 + pshufb %xmm11, %xmm3 + pshufb %xmm11, %xmm4 + pshufb %xmm11, %xmm5 + pshufb %xmm11, %xmm6 + pshufb %xmm11, %xmm7 + + movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4 + #imm value of pclmulqdq instruction + #will determine which constant to use + + ################################################################# + # we subtract 256 instead of 128 to save one instruction from the loop + sub $256, arg3 + + # at this section of the code, there is 64*x+y (0<=y<64) bytes of + # buffer. The _fold_64_B_loop will fold 64B at a time + # until we have 64+y Bytes of buffer + + + # fold 64B at a time. This section of the code folds 4 xmm + # registers in parallel +_fold_64_B_loop: + + # update the buffer pointer + add $128, arg2 # buf += 64# + + movdqu 16*0(arg2), %xmm9 + movdqu 16*1(arg2), %xmm12 + pshufb %xmm11, %xmm9 + pshufb %xmm11, %xmm12 + movdqa %xmm0, %xmm8 + movdqa %xmm1, %xmm13 + pclmulqdq $0x0 , %xmm10, %xmm0 + pclmulqdq $0x11, %xmm10, %xmm8 + pclmulqdq $0x0 , %xmm10, %xmm1 + pclmulqdq $0x11, %xmm10, %xmm13 + pxor %xmm9 , %xmm0 + xorps %xmm8 , %xmm0 + pxor %xmm12, %xmm1 + xorps %xmm13, %xmm1 + + movdqu 16*2(arg2), %xmm9 + movdqu 16*3(arg2), %xmm12 + pshufb %xmm11, %xmm9 + pshufb %xmm11, %xmm12 + movdqa %xmm2, %xmm8 + movdqa %xmm3, %xmm13 + pclmulqdq $0x0, %xmm10, %xmm2 + pclmulqdq $0x11, %xmm10, %xmm8 + pclmulqdq $0x0, %xmm10, %xmm3 + pclmulqdq $0x11, %xmm10, %xmm13 + pxor %xmm9 , %xmm2 + xorps %xmm8 , %xmm2 + pxor %xmm12, %xmm3 + xorps %xmm13, %xmm3 + + movdqu 16*4(arg2), %xmm9 + movdqu 16*5(arg2), %xmm12 + pshufb %xmm11, %xmm9 + pshufb %xmm11, %xmm12 + movdqa %xmm4, %xmm8 + movdqa %xmm5, %xmm13 + pclmulqdq $0x0, %xmm10, %xmm4 + pclmulqdq $0x11, %xmm10, %xmm8 + pclmulqdq $0x0, %xmm10, %xmm5 + pclmulqdq $0x11, %xmm10, %xmm13 + pxor %xmm9 , %xmm4 + xorps %xmm8 , %xmm4 + pxor %xmm12, %xmm5 + xorps %xmm13, %xmm5 + + movdqu 16*6(arg2), %xmm9 + movdqu 16*7(arg2), %xmm12 + pshufb %xmm11, %xmm9 + pshufb %xmm11, %xmm12 + movdqa %xmm6 , %xmm8 + movdqa %xmm7 , %xmm13 + pclmulqdq $0x0 , %xmm10, %xmm6 + pclmulqdq $0x11, %xmm10, %xmm8 + pclmulqdq $0x0 , %xmm10, %xmm7 + pclmulqdq $0x11, %xmm10, %xmm13 + pxor %xmm9 , %xmm6 + xorps %xmm8 , %xmm6 + pxor %xmm12, %xmm7 + xorps %xmm13, %xmm7 + + sub $128, arg3 + + # check if there is another 64B in the buffer to be able to fold + jge _fold_64_B_loop + ################################################################## + + + add $128, arg2 + # at this point, the buffer pointer is pointing at the last y Bytes + # of the buffer the 64B of folded data is in 4 of the xmm + # registers: xmm0, xmm1, xmm2, xmm3 + + + # fold the 8 xmm registers to 1 xmm register with different constants + + movdqa rk9(%rip), %xmm10 + movdqa %xmm0, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm0 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + xorps %xmm0, %xmm7 + + movdqa rk11(%rip), %xmm10 + movdqa %xmm1, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm1 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + xorps %xmm1, %xmm7 + + movdqa rk13(%rip), %xmm10 + movdqa %xmm2, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm2 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + pxor %xmm2, %xmm7 + + movdqa rk15(%rip), %xmm10 + movdqa %xmm3, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm3 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + xorps %xmm3, %xmm7 + + movdqa rk17(%rip), %xmm10 + movdqa %xmm4, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm4 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + pxor %xmm4, %xmm7 + + movdqa rk19(%rip), %xmm10 + movdqa %xmm5, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm5 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + xorps %xmm5, %xmm7 + + movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2 + #imm value of pclmulqdq instruction + #will determine which constant to use + movdqa %xmm6, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm6 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + pxor %xmm6, %xmm7 + + + # instead of 64, we add 48 to the loop counter to save 1 instruction + # from the loop instead of a cmp instruction, we use the negative + # flag with the jl instruction + add $128-16, arg3 + jl _final_reduction_for_128 + + # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 + # and the rest is in memory. We can fold 16 bytes at a time if y>=16 + # continue folding 16B at a time + +_16B_reduction_loop: + movdqa %xmm7, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm7 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + movdqu (arg2), %xmm0 + pshufb %xmm11, %xmm0 + pxor %xmm0 , %xmm7 + add $16, arg2 + sub $16, arg3 + # instead of a cmp instruction, we utilize the flags with the + # jge instruction equivalent of: cmp arg3, 16-16 + # check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + #now we have 16+z bytes left to reduce, where 0<= z < 16. + #first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + # check if any more data to fold. If not, compute the CRC of + # the final 128 bits + add $16, arg3 + je _128_done + + # here we are getting data that is less than 16 bytes. + # since we know that there was data before the pointer, we can + # offset the input pointer before the actual point, to receive + # exactly 16 bytes. after that the registers need to be adjusted. +_get_last_two_xmms: + movdqa %xmm7, %xmm2 + + movdqu -16(arg2, arg3), %xmm1 + pshufb %xmm11, %xmm1 + + # get rid of the extra data that was loaded before + # load the shift constant + lea pshufb_shf_table+16(%rip), %rax + sub arg3, %rax + movdqu (%rax), %xmm0 + + # shift xmm2 to the left by arg3 bytes + pshufb %xmm0, %xmm2 + + # shift xmm7 to the right by 16-arg3 bytes + pxor mask1(%rip), %xmm0 + pshufb %xmm0, %xmm7 + pblendvb %xmm2, %xmm1 #xmm0 is implicit + + # fold 16 Bytes + movdqa %xmm1, %xmm2 + movdqa %xmm7, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm7 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + pxor %xmm2, %xmm7 + +_128_done: + # compute crc of a 128-bit value + movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10 + movdqa %xmm7, %xmm0 + + #64b fold + pclmulqdq $0x1, %xmm10, %xmm7 + pslldq $8 , %xmm0 + pxor %xmm0, %xmm7 + + #32b fold + movdqa %xmm7, %xmm0 + + pand mask2(%rip), %xmm0 + + psrldq $12, %xmm7 + pclmulqdq $0x10, %xmm10, %xmm7 + pxor %xmm0, %xmm7 + + #barrett reduction +_barrett: + movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10 + movdqa %xmm7, %xmm0 + pclmulqdq $0x01, %xmm10, %xmm7 + pslldq $4, %xmm7 + pclmulqdq $0x11, %xmm10, %xmm7 + + pslldq $4, %xmm7 + pxor %xmm0, %xmm7 + pextrd $1, %xmm7, %eax + +_cleanup: + # scale the result back to 16 bits + shr $16, %eax + mov %rcx, %rsp + ret + +######################################################################## + +.align 16 +_less_than_128: + + # check if there is enough buffer to be able to fold 16B at a time + cmp $32, arg3 + jl _less_than_32 + movdqa SHUF_MASK(%rip), %xmm11 + + # now if there is, load the constants + movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 + + movd arg1_low32, %xmm0 # get the initial crc value + pslldq $12, %xmm0 # align it to its correct place + movdqu (arg2), %xmm7 # load the plaintext + pshufb %xmm11, %xmm7 # byte-reflect the plaintext + pxor %xmm0, %xmm7 + + + # update the buffer pointer + add $16, arg2 + + # update the counter. subtract 32 instead of 16 to save one + # instruction from the loop + sub $32, arg3 + + jmp _16B_reduction_loop + + +.align 16 +_less_than_32: + # mov initial crc to the return value. this is necessary for + # zero-length buffers. + mov arg1_low32, %eax + test arg3, arg3 + je _cleanup + + movdqa SHUF_MASK(%rip), %xmm11 + + movd arg1_low32, %xmm0 # get the initial crc value + pslldq $12, %xmm0 # align it to its correct place + + cmp $16, arg3 + je _exact_16_left + jl _less_than_16_left + + movdqu (arg2), %xmm7 # load the plaintext + pshufb %xmm11, %xmm7 # byte-reflect the plaintext + pxor %xmm0 , %xmm7 # xor the initial crc value + add $16, arg2 + sub $16, arg3 + movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 + jmp _get_last_two_xmms + + +.align 16 +_less_than_16_left: + # use stack space to load data less than 16 bytes, zero-out + # the 16B in memory first. + + pxor %xmm1, %xmm1 + mov %rsp, %r11 + movdqa %xmm1, (%r11) + + cmp $4, arg3 + jl _only_less_than_4 + + # backup the counter value + mov arg3, %r9 + cmp $8, arg3 + jl _less_than_8_left + + # load 8 Bytes + mov (arg2), %rax + mov %rax, (%r11) + add $8, %r11 + sub $8, arg3 + add $8, arg2 +_less_than_8_left: + + cmp $4, arg3 + jl _less_than_4_left + + # load 4 Bytes + mov (arg2), %eax + mov %eax, (%r11) + add $4, %r11 + sub $4, arg3 + add $4, arg2 +_less_than_4_left: + + cmp $2, arg3 + jl _less_than_2_left + + # load 2 Bytes + mov (arg2), %ax + mov %ax, (%r11) + add $2, %r11 + sub $2, arg3 + add $2, arg2 +_less_than_2_left: + cmp $1, arg3 + jl _zero_left + + # load 1 Byte + mov (arg2), %al + mov %al, (%r11) +_zero_left: + movdqa (%rsp), %xmm7 + pshufb %xmm11, %xmm7 + pxor %xmm0 , %xmm7 # xor the initial crc value + + # shl r9, 4 + lea pshufb_shf_table+16(%rip), %rax + sub %r9, %rax + movdqu (%rax), %xmm0 + pxor mask1(%rip), %xmm0 + + pshufb %xmm0, %xmm7 + jmp _128_done + +.align 16 +_exact_16_left: + movdqu (arg2), %xmm7 + pshufb %xmm11, %xmm7 + pxor %xmm0 , %xmm7 # xor the initial crc value + + jmp _128_done + +_only_less_than_4: + cmp $3, arg3 + jl _only_less_than_3 + + # load 3 Bytes + mov (arg2), %al + mov %al, (%r11) + + mov 1(arg2), %al + mov %al, 1(%r11) + + mov 2(arg2), %al + mov %al, 2(%r11) + + movdqa (%rsp), %xmm7 + pshufb %xmm11, %xmm7 + pxor %xmm0 , %xmm7 # xor the initial crc value + + psrldq $5, %xmm7 + + jmp _barrett +_only_less_than_3: + cmp $2, arg3 + jl _only_less_than_2 + + # load 2 Bytes + mov (arg2), %al + mov %al, (%r11) + + mov 1(arg2), %al + mov %al, 1(%r11) + + movdqa (%rsp), %xmm7 + pshufb %xmm11, %xmm7 + pxor %xmm0 , %xmm7 # xor the initial crc value + + psrldq $6, %xmm7 + + jmp _barrett +_only_less_than_2: + + # load 1 Byte + mov (arg2), %al + mov %al, (%r11) + + movdqa (%rsp), %xmm7 + pshufb %xmm11, %xmm7 + pxor %xmm0 , %xmm7 # xor the initial crc value + + psrldq $7, %xmm7 + + jmp _barrett + +ENDPROC(crc_t10dif_pcl) + +.data + +# precomputed constants +# these constants are precomputed from the poly: +# 0x8bb70000 (0x8bb7 scaled to 32 bits) +.align 16 +# Q = 0x18BB70000 +# rk1 = 2^(32*3) mod Q << 32 +# rk2 = 2^(32*5) mod Q << 32 +# rk3 = 2^(32*15) mod Q << 32 +# rk4 = 2^(32*17) mod Q << 32 +# rk5 = 2^(32*3) mod Q << 32 +# rk6 = 2^(32*2) mod Q << 32 +# rk7 = floor(2^64/Q) +# rk8 = Q +rk1: +.quad 0x2d56000000000000 +rk2: +.quad 0x06df000000000000 +rk3: +.quad 0x9d9d000000000000 +rk4: +.quad 0x7cf5000000000000 +rk5: +.quad 0x2d56000000000000 +rk6: +.quad 0x1368000000000000 +rk7: +.quad 0x00000001f65a57f8 +rk8: +.quad 0x000000018bb70000 + +rk9: +.quad 0xceae000000000000 +rk10: +.quad 0xbfd6000000000000 +rk11: +.quad 0x1e16000000000000 +rk12: +.quad 0x713c000000000000 +rk13: +.quad 0xf7f9000000000000 +rk14: +.quad 0x80a6000000000000 +rk15: +.quad 0x044c000000000000 +rk16: +.quad 0xe658000000000000 +rk17: +.quad 0xad18000000000000 +rk18: +.quad 0xa497000000000000 +rk19: +.quad 0x6ee3000000000000 +rk20: +.quad 0xe7b5000000000000 + + + +mask1: +.octa 0x80808080808080808080808080808080 +mask2: +.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF + +SHUF_MASK: +.octa 0x000102030405060708090A0B0C0D0E0F + +pshufb_shf_table: +# use these values for shift constants for the pshufb instruction +# different alignments result in values as shown: +# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1 +# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2 +# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3 +# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4 +# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5 +# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6 +# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7 +# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8 +# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9 +# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10 +# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11 +# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12 +# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13 +# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14 +# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15 +.octa 0x8f8e8d8c8b8a89888786858483828100 +.octa 0x000e0d0c0b0a09080706050403020100 diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c new file mode 100644 index 000000000000..7845d7fd54c0 --- /dev/null +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -0,0 +1,151 @@ +/* + * Cryptographic API. + * + * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions + * + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/crc-t10dif.h> +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <asm/i387.h> +#include <asm/cpufeature.h> +#include <asm/cpu_device_id.h> + +asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, + size_t len); + +struct chksum_desc_ctx { + __u16 crc; +}; + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ + +static int chksum_init(struct shash_desc *desc) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = 0; + + return 0; +} + +static int chksum_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + if (irq_fpu_usable()) { + kernel_fpu_begin(); + ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); + kernel_fpu_end(); + } else + ctx->crc = crc_t10dif_generic(ctx->crc, data, length); + return 0; +} + +static int chksum_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + *(__u16 *)out = ctx->crc; + return 0; +} + +static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + if (irq_fpu_usable()) { + kernel_fpu_begin(); + *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); + kernel_fpu_end(); + } else + *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + return 0; +} + +static int chksum_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(&ctx->crc, data, len, out); +} + +static int chksum_digest(struct shash_desc *desc, const u8 *data, + unsigned int length, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(&ctx->crc, data, length, out); +} + +static struct shash_alg alg = { + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .init = chksum_init, + .update = chksum_update, + .final = chksum_final, + .finup = chksum_finup, + .digest = chksum_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crct10dif", + .cra_driver_name = "crct10dif-pclmul", + .cra_priority = 200, + .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static const struct x86_cpu_id crct10dif_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id); + +static int __init crct10dif_intel_mod_init(void) +{ + if (!x86_match_cpu(crct10dif_cpu_id)) + return -ENODEV; + + return crypto_register_shash(&alg); +} + +static void __exit crct10dif_intel_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crct10dif_intel_mod_init); +module_exit(crct10dif_intel_mod_fini); + +MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); +MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS("crct10dif"); +MODULE_ALIAS("crct10dif-pclmul"); diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 597d4da69656..50226c4b86ed 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -187,7 +187,36 @@ static int sha256_ssse3_import(struct shash_desc *desc, const void *in) return 0; } -static struct shash_alg alg = { +static int sha224_ssse3_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; + + return 0; +} + +static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash) +{ + u8 D[SHA256_DIGEST_SIZE]; + + sha256_ssse3_final(desc, D); + + memcpy(hash, D, SHA224_DIGEST_SIZE); + memset(D, 0, SHA256_DIGEST_SIZE); + + return 0; +} + +static struct shash_alg algs[] = { { .digestsize = SHA256_DIGEST_SIZE, .init = sha256_ssse3_init, .update = sha256_ssse3_update, @@ -204,7 +233,24 @@ static struct shash_alg alg = { .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, } -}; +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_ssse3_init, + .update = sha256_ssse3_update, + .final = sha224_ssse3_final, + .export = sha256_ssse3_export, + .import = sha256_ssse3_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-ssse3", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; #ifdef CONFIG_AS_AVX static bool __init avx_usable(void) @@ -227,7 +273,7 @@ static bool __init avx_usable(void) static int __init sha256_ssse3_mod_init(void) { - /* test for SSE3 first */ + /* test for SSSE3 first */ if (cpu_has_ssse3) sha256_transform_asm = sha256_transform_ssse3; @@ -254,7 +300,7 @@ static int __init sha256_ssse3_mod_init(void) else #endif pr_info("Using SSSE3 optimized SHA-256 implementation\n"); - return crypto_register_shash(&alg); + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); } pr_info("Neither AVX nor SSSE3 is available/usable.\n"); @@ -263,7 +309,7 @@ static int __init sha256_ssse3_mod_init(void) static void __exit sha256_ssse3_mod_fini(void) { - crypto_unregister_shash(&alg); + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); } module_init(sha256_ssse3_mod_init); @@ -273,3 +319,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); MODULE_ALIAS("sha256"); +MODULE_ALIAS("sha384"); diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 6cbd8df348d2..f30cd10293f0 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -194,7 +194,37 @@ static int sha512_ssse3_import(struct shash_desc *desc, const void *in) return 0; } -static struct shash_alg alg = { +static int sha384_ssse3_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA384_H0; + sctx->state[1] = SHA384_H1; + sctx->state[2] = SHA384_H2; + sctx->state[3] = SHA384_H3; + sctx->state[4] = SHA384_H4; + sctx->state[5] = SHA384_H5; + sctx->state[6] = SHA384_H6; + sctx->state[7] = SHA384_H7; + + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash) +{ + u8 D[SHA512_DIGEST_SIZE]; + + sha512_ssse3_final(desc, D); + + memcpy(hash, D, SHA384_DIGEST_SIZE); + memset(D, 0, SHA512_DIGEST_SIZE); + + return 0; +} + +static struct shash_alg algs[] = { { .digestsize = SHA512_DIGEST_SIZE, .init = sha512_ssse3_init, .update = sha512_ssse3_update, @@ -211,7 +241,24 @@ static struct shash_alg alg = { .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, } -}; +}, { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_ssse3_init, + .update = sha512_ssse3_update, + .final = sha384_ssse3_final, + .export = sha512_ssse3_export, + .import = sha512_ssse3_import, + .descsize = sizeof(struct sha512_state), + .statesize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-ssse3", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; #ifdef CONFIG_AS_AVX static bool __init avx_usable(void) @@ -234,7 +281,7 @@ static bool __init avx_usable(void) static int __init sha512_ssse3_mod_init(void) { - /* test for SSE3 first */ + /* test for SSSE3 first */ if (cpu_has_ssse3) sha512_transform_asm = sha512_transform_ssse3; @@ -261,7 +308,7 @@ static int __init sha512_ssse3_mod_init(void) else #endif pr_info("Using SSSE3 optimized SHA-512 implementation\n"); - return crypto_register_shash(&alg); + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); } pr_info("Neither AVX nor SSSE3 is available/usable.\n"); @@ -270,7 +317,7 @@ static int __init sha512_ssse3_mod_init(void) static void __exit sha512_ssse3_mod_fini(void) { - crypto_unregister_shash(&alg); + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); } module_init(sha512_ssse3_mod_init); @@ -280,3 +327,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); MODULE_ALIAS("sha512"); +MODULE_ALIAS("sha384"); diff --git a/arch/x86/crypto/twofish-avx2-asm_64.S b/arch/x86/crypto/twofish-avx2-asm_64.S deleted file mode 100644 index e1a83b9cd389..000000000000 --- a/arch/x86/crypto/twofish-avx2-asm_64.S +++ /dev/null @@ -1,600 +0,0 @@ -/* - * x86_64/AVX2 assembler optimized version of Twofish - * - * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ - -#include <linux/linkage.h> -#include "glue_helper-asm-avx2.S" - -.file "twofish-avx2-asm_64.S" - -.data -.align 16 - -.Lvpshufb_mask0: -.long 0x80808000 -.long 0x80808004 -.long 0x80808008 -.long 0x8080800c - -.Lbswap128_mask: - .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -.Lxts_gf128mul_and_shl1_mask_0: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 -.Lxts_gf128mul_and_shl1_mask_1: - .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 - -.text - -/* structure of crypto context */ -#define s0 0 -#define s1 1024 -#define s2 2048 -#define s3 3072 -#define w 4096 -#define k 4128 - -/* register macros */ -#define CTX %rdi - -#define RS0 CTX -#define RS1 %r8 -#define RS2 %r9 -#define RS3 %r10 -#define RK %r11 -#define RW %rax -#define RROUND %r12 -#define RROUNDd %r12d - -#define RA0 %ymm8 -#define RB0 %ymm9 -#define RC0 %ymm10 -#define RD0 %ymm11 -#define RA1 %ymm12 -#define RB1 %ymm13 -#define RC1 %ymm14 -#define RD1 %ymm15 - -/* temp regs */ -#define RX0 %ymm0 -#define RY0 %ymm1 -#define RX1 %ymm2 -#define RY1 %ymm3 -#define RT0 %ymm4 -#define RIDX %ymm5 - -#define RX0x %xmm0 -#define RY0x %xmm1 -#define RX1x %xmm2 -#define RY1x %xmm3 -#define RT0x %xmm4 - -/* vpgatherdd mask and '-1' */ -#define RNOT %ymm6 - -/* byte mask, (-1 >> 24) */ -#define RBYTE %ymm7 - -/********************************************************************** - 16-way AVX2 twofish - **********************************************************************/ -#define init_round_constants() \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpsrld $24, RNOT, RBYTE; \ - leaq k(CTX), RK; \ - leaq w(CTX), RW; \ - leaq s1(CTX), RS1; \ - leaq s2(CTX), RS2; \ - leaq s3(CTX), RS3; \ - -#define g16(ab, rs0, rs1, rs2, rs3, xy) \ - vpand RBYTE, ab ## 0, RIDX; \ - vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - \ - vpand RBYTE, ab ## 1, RIDX; \ - vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - \ - vpsrld $8, ab ## 0, RIDX; \ - vpand RBYTE, RIDX, RIDX; \ - vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpxor RT0, xy ## 0, xy ## 0; \ - \ - vpsrld $8, ab ## 1, RIDX; \ - vpand RBYTE, RIDX, RIDX; \ - vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpxor RT0, xy ## 1, xy ## 1; \ - \ - vpsrld $16, ab ## 0, RIDX; \ - vpand RBYTE, RIDX, RIDX; \ - vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpxor RT0, xy ## 0, xy ## 0; \ - \ - vpsrld $16, ab ## 1, RIDX; \ - vpand RBYTE, RIDX, RIDX; \ - vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpxor RT0, xy ## 1, xy ## 1; \ - \ - vpsrld $24, ab ## 0, RIDX; \ - vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpxor RT0, xy ## 0, xy ## 0; \ - \ - vpsrld $24, ab ## 1, RIDX; \ - vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpxor RT0, xy ## 1, xy ## 1; - -#define g1_16(a, x) \ - g16(a, RS0, RS1, RS2, RS3, x); - -#define g2_16(b, y) \ - g16(b, RS1, RS2, RS3, RS0, y); - -#define encrypt_round_end16(a, b, c, d, nk) \ - vpaddd RY0, RX0, RX0; \ - vpaddd RX0, RY0, RY0; \ - vpbroadcastd nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RX0, RX0; \ - vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RY0, RY0; \ - \ - vpxor RY0, d ## 0, d ## 0; \ - \ - vpxor RX0, c ## 0, c ## 0; \ - vpsrld $1, c ## 0, RT0; \ - vpslld $31, c ## 0, c ## 0; \ - vpor RT0, c ## 0, c ## 0; \ - \ - vpaddd RY1, RX1, RX1; \ - vpaddd RX1, RY1, RY1; \ - vpbroadcastd nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RX1, RX1; \ - vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RY1, RY1; \ - \ - vpxor RY1, d ## 1, d ## 1; \ - \ - vpxor RX1, c ## 1, c ## 1; \ - vpsrld $1, c ## 1, RT0; \ - vpslld $31, c ## 1, c ## 1; \ - vpor RT0, c ## 1, c ## 1; \ - -#define encrypt_round16(a, b, c, d, nk) \ - g2_16(b, RY); \ - \ - vpslld $1, b ## 0, RT0; \ - vpsrld $31, b ## 0, b ## 0; \ - vpor RT0, b ## 0, b ## 0; \ - \ - vpslld $1, b ## 1, RT0; \ - vpsrld $31, b ## 1, b ## 1; \ - vpor RT0, b ## 1, b ## 1; \ - \ - g1_16(a, RX); \ - \ - encrypt_round_end16(a, b, c, d, nk); - -#define encrypt_round_first16(a, b, c, d, nk) \ - vpslld $1, d ## 0, RT0; \ - vpsrld $31, d ## 0, d ## 0; \ - vpor RT0, d ## 0, d ## 0; \ - \ - vpslld $1, d ## 1, RT0; \ - vpsrld $31, d ## 1, d ## 1; \ - vpor RT0, d ## 1, d ## 1; \ - \ - encrypt_round16(a, b, c, d, nk); - -#define encrypt_round_last16(a, b, c, d, nk) \ - g2_16(b, RY); \ - \ - g1_16(a, RX); \ - \ - encrypt_round_end16(a, b, c, d, nk); - -#define decrypt_round_end16(a, b, c, d, nk) \ - vpaddd RY0, RX0, RX0; \ - vpaddd RX0, RY0, RY0; \ - vpbroadcastd nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RX0, RX0; \ - vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RY0, RY0; \ - \ - vpxor RX0, c ## 0, c ## 0; \ - \ - vpxor RY0, d ## 0, d ## 0; \ - vpsrld $1, d ## 0, RT0; \ - vpslld $31, d ## 0, d ## 0; \ - vpor RT0, d ## 0, d ## 0; \ - \ - vpaddd RY1, RX1, RX1; \ - vpaddd RX1, RY1, RY1; \ - vpbroadcastd nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RX1, RX1; \ - vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RY1, RY1; \ - \ - vpxor RX1, c ## 1, c ## 1; \ - \ - vpxor RY1, d ## 1, d ## 1; \ - vpsrld $1, d ## 1, RT0; \ - vpslld $31, d ## 1, d ## 1; \ - vpor RT0, d ## 1, d ## 1; - -#define decrypt_round16(a, b, c, d, nk) \ - g1_16(a, RX); \ - \ - vpslld $1, a ## 0, RT0; \ - vpsrld $31, a ## 0, a ## 0; \ - vpor RT0, a ## 0, a ## 0; \ - \ - vpslld $1, a ## 1, RT0; \ - vpsrld $31, a ## 1, a ## 1; \ - vpor RT0, a ## 1, a ## 1; \ - \ - g2_16(b, RY); \ - \ - decrypt_round_end16(a, b, c, d, nk); - -#define decrypt_round_first16(a, b, c, d, nk) \ - vpslld $1, c ## 0, RT0; \ - vpsrld $31, c ## 0, c ## 0; \ - vpor RT0, c ## 0, c ## 0; \ - \ - vpslld $1, c ## 1, RT0; \ - vpsrld $31, c ## 1, c ## 1; \ - vpor RT0, c ## 1, c ## 1; \ - \ - decrypt_round16(a, b, c, d, nk) - -#define decrypt_round_last16(a, b, c, d, nk) \ - g1_16(a, RX); \ - \ - g2_16(b, RY); \ - \ - decrypt_round_end16(a, b, c, d, nk); - -#define encrypt_cycle16() \ - encrypt_round16(RA, RB, RC, RD, 0); \ - encrypt_round16(RC, RD, RA, RB, 8); - -#define encrypt_cycle_first16() \ - encrypt_round_first16(RA, RB, RC, RD, 0); \ - encrypt_round16(RC, RD, RA, RB, 8); - -#define encrypt_cycle_last16() \ - encrypt_round16(RA, RB, RC, RD, 0); \ - encrypt_round_last16(RC, RD, RA, RB, 8); - -#define decrypt_cycle16(n) \ - decrypt_round16(RC, RD, RA, RB, 8); \ - decrypt_round16(RA, RB, RC, RD, 0); - -#define decrypt_cycle_first16(n) \ - decrypt_round_first16(RC, RD, RA, RB, 8); \ - decrypt_round16(RA, RB, RC, RD, 0); - -#define decrypt_cycle_last16(n) \ - decrypt_round16(RC, RD, RA, RB, 8); \ - decrypt_round_last16(RA, RB, RC, RD, 0); - -#define transpose_4x4(x0,x1,x2,x3,t1,t2) \ - vpunpckhdq x1, x0, t2; \ - vpunpckldq x1, x0, x0; \ - \ - vpunpckldq x3, x2, t1; \ - vpunpckhdq x3, x2, x2; \ - \ - vpunpckhqdq t1, x0, x1; \ - vpunpcklqdq t1, x0, x0; \ - \ - vpunpckhqdq x2, t2, x3; \ - vpunpcklqdq x2, t2, x2; - -#define read_blocks8(offs,a,b,c,d) \ - transpose_4x4(a, b, c, d, RX0, RY0); - -#define write_blocks8(offs,a,b,c,d) \ - transpose_4x4(a, b, c, d, RX0, RY0); - -#define inpack_enc8(a,b,c,d) \ - vpbroadcastd 4*0(RW), RT0; \ - vpxor RT0, a, a; \ - \ - vpbroadcastd 4*1(RW), RT0; \ - vpxor RT0, b, b; \ - \ - vpbroadcastd 4*2(RW), RT0; \ - vpxor RT0, c, c; \ - \ - vpbroadcastd 4*3(RW), RT0; \ - vpxor RT0, d, d; - -#define outunpack_enc8(a,b,c,d) \ - vpbroadcastd 4*4(RW), RX0; \ - vpbroadcastd 4*5(RW), RY0; \ - vpxor RX0, c, RX0; \ - vpxor RY0, d, RY0; \ - \ - vpbroadcastd 4*6(RW), RT0; \ - vpxor RT0, a, c; \ - vpbroadcastd 4*7(RW), RT0; \ - vpxor RT0, b, d; \ - \ - vmovdqa RX0, a; \ - vmovdqa RY0, b; - -#define inpack_dec8(a,b,c,d) \ - vpbroadcastd 4*4(RW), RX0; \ - vpbroadcastd 4*5(RW), RY0; \ - vpxor RX0, a, RX0; \ - vpxor RY0, b, RY0; \ - \ - vpbroadcastd 4*6(RW), RT0; \ - vpxor RT0, c, a; \ - vpbroadcastd 4*7(RW), RT0; \ - vpxor RT0, d, b; \ - \ - vmovdqa RX0, c; \ - vmovdqa RY0, d; - -#define outunpack_dec8(a,b,c,d) \ - vpbroadcastd 4*0(RW), RT0; \ - vpxor RT0, a, a; \ - \ - vpbroadcastd 4*1(RW), RT0; \ - vpxor RT0, b, b; \ - \ - vpbroadcastd 4*2(RW), RT0; \ - vpxor RT0, c, c; \ - \ - vpbroadcastd 4*3(RW), RT0; \ - vpxor RT0, d, d; - -#define read_blocks16(a,b,c,d) \ - read_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ - read_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); - -#define write_blocks16(a,b,c,d) \ - write_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ - write_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); - -#define xor_blocks16(a,b,c,d) \ - xor_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ - xor_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); - -#define inpack_enc16(a,b,c,d) \ - inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \ - inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1); - -#define outunpack_enc16(a,b,c,d) \ - outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \ - outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1); - -#define inpack_dec16(a,b,c,d) \ - inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \ - inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1); - -#define outunpack_dec16(a,b,c,d) \ - outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \ - outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1); - -.align 8 -__twofish_enc_blk16: - /* input: - * %rdi: ctx, CTX - * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext - * output: - * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext - */ - init_round_constants(); - - read_blocks16(RA, RB, RC, RD); - inpack_enc16(RA, RB, RC, RD); - - xorl RROUNDd, RROUNDd; - encrypt_cycle_first16(); - movl $2, RROUNDd; - -.align 4 -.L__enc_loop: - encrypt_cycle16(); - - addl $2, RROUNDd; - cmpl $14, RROUNDd; - jne .L__enc_loop; - - encrypt_cycle_last16(); - - outunpack_enc16(RA, RB, RC, RD); - write_blocks16(RA, RB, RC, RD); - - ret; -ENDPROC(__twofish_enc_blk16) - -.align 8 -__twofish_dec_blk16: - /* input: - * %rdi: ctx, CTX - * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext - * output: - * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext - */ - init_round_constants(); - - read_blocks16(RA, RB, RC, RD); - inpack_dec16(RA, RB, RC, RD); - - movl $14, RROUNDd; - decrypt_cycle_first16(); - movl $12, RROUNDd; - -.align 4 -.L__dec_loop: - decrypt_cycle16(); - - addl $-2, RROUNDd; - jnz .L__dec_loop; - - decrypt_cycle_last16(); - - outunpack_dec16(RA, RB, RC, RD); - write_blocks16(RA, RB, RC, RD); - - ret; -ENDPROC(__twofish_dec_blk16) - -ENTRY(twofish_ecb_enc_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - vzeroupper; - pushq %r12; - - load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - call __twofish_enc_blk16; - - store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - popq %r12; - vzeroupper; - - ret; -ENDPROC(twofish_ecb_enc_16way) - -ENTRY(twofish_ecb_dec_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - vzeroupper; - pushq %r12; - - load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - call __twofish_dec_blk16; - - store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - popq %r12; - vzeroupper; - - ret; -ENDPROC(twofish_ecb_dec_16way) - -ENTRY(twofish_cbc_dec_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - vzeroupper; - pushq %r12; - - load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - call __twofish_dec_blk16; - - store_cbc_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1, - RX0); - - popq %r12; - vzeroupper; - - ret; -ENDPROC(twofish_cbc_dec_16way) - -ENTRY(twofish_ctr_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (little endian, 128bit) - */ - - vzeroupper; - pushq %r12; - - load_ctr_16way(%rcx, .Lbswap128_mask, RA0, RB0, RC0, RD0, RA1, RB1, RC1, - RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT, - RBYTE); - - call __twofish_enc_blk16; - - store_ctr_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - popq %r12; - vzeroupper; - - ret; -ENDPROC(twofish_ctr_16way) - -.align 8 -twofish_xts_crypt_16way: - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - * %r8: pointer to __twofish_enc_blk16 or __twofish_dec_blk16 - */ - - vzeroupper; - pushq %r12; - - load_xts_16way(%rcx, %rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, - RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT, - .Lxts_gf128mul_and_shl1_mask_0, - .Lxts_gf128mul_and_shl1_mask_1); - - call *%r8; - - store_xts_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - popq %r12; - vzeroupper; - - ret; -ENDPROC(twofish_xts_crypt_16way) - -ENTRY(twofish_xts_enc_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - leaq __twofish_enc_blk16, %r8; - jmp twofish_xts_crypt_16way; -ENDPROC(twofish_xts_enc_16way) - -ENTRY(twofish_xts_dec_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - leaq __twofish_dec_blk16, %r8; - jmp twofish_xts_crypt_16way; -ENDPROC(twofish_xts_dec_16way) diff --git a/arch/x86/crypto/twofish_avx2_glue.c b/arch/x86/crypto/twofish_avx2_glue.c deleted file mode 100644 index ce33b5be64ee..000000000000 --- a/arch/x86/crypto/twofish_avx2_glue.c +++ /dev/null @@ -1,584 +0,0 @@ -/* - * Glue Code for x86_64/AVX2 assembler optimized version of Twofish - * - * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/crypto.h> -#include <linux/err.h> -#include <crypto/algapi.h> -#include <crypto/ctr.h> -#include <crypto/twofish.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> -#include <asm/xcr.h> -#include <asm/xsave.h> -#include <asm/crypto/twofish.h> -#include <asm/crypto/ablk_helper.h> -#include <asm/crypto/glue_helper.h> -#include <crypto/scatterwalk.h> - -#define TF_AVX2_PARALLEL_BLOCKS 16 - -/* 16-way AVX2 parallel cipher functions */ -asmlinkage void twofish_ecb_enc_16way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_ecb_dec_16way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src); - -asmlinkage void twofish_ctr_16way(void *ctx, u128 *dst, const u128 *src, - le128 *iv); - -asmlinkage void twofish_xts_enc_16way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void twofish_xts_dec_16way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - -static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src) -{ - __twofish_enc_blk_3way(ctx, dst, src, false); -} - -static const struct common_glue_ctx twofish_enc = { - .num_funcs = 4, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_16way) } - }, { - .num_blocks = 8, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) } - }, { - .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } - }, { - .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } - } } -}; - -static const struct common_glue_ctx twofish_ctr = { - .num_funcs = 4, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_16way) } - }, { - .num_blocks = 8, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) } - }, { - .num_blocks = 3, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } - }, { - .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } - } } -}; - -static const struct common_glue_ctx twofish_enc_xts = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way) } - }, { - .num_blocks = 8, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) } - }, { - .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) } - } } -}; - -static const struct common_glue_ctx twofish_dec = { - .num_funcs = 4, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_16way) } - }, { - .num_blocks = 8, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) } - }, { - .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } - }, { - .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } - } } -}; - -static const struct common_glue_ctx twofish_dec_cbc = { - .num_funcs = 4, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way) } - }, { - .num_blocks = 8, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) } - }, { - .num_blocks = 3, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } - }, { - .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } - } } -}; - -static const struct common_glue_ctx twofish_dec_xts = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way) } - }, { - .num_blocks = 8, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) } - }, { - .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) } - } } -}; - -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, - dst, src, nbytes); -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, - nbytes); -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); -} - -static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - /* since reusing AVX functions, starts using FPU at 8 parallel blocks */ - return glue_fpu_begin(TF_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes); -} - -static inline void twofish_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); -} - -struct crypt_priv { - struct twofish_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = TF_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); - - while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) { - twofish_ecb_enc_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS; - } - - while (nbytes >= 8 * bsize) { - twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * 8; - nbytes -= bsize * 8; - } - - while (nbytes >= 3 * bsize) { - twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * 3; - nbytes -= bsize * 3; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_enc_blk(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = TF_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); - - while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) { - twofish_ecb_dec_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS; - } - - while (nbytes >= 8 * bsize) { - twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * 8; - nbytes -= bsize * 8; - } - - while (nbytes >= 3 * bsize) { - twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * 3; - nbytes -= bsize * 3; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_dec_blk(ctx->ctx, srcdst, srcdst); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[TF_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->twofish_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - twofish_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[TF_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->twofish_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - twofish_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(twofish_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(twofish_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static struct crypto_alg tf_algs[10] = { { - .cra_name = "__ecb-twofish-avx2", - .cra_driver_name = "__driver-ecb-twofish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-twofish-avx2", - .cra_driver_name = "__driver-cbc-twofish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-twofish-avx2", - .cra_driver_name = "__driver-ctr-twofish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-twofish-avx2", - .cra_driver_name = "__driver-lrw-twofish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_twofish_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + - TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + - TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = lrw_twofish_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-twofish-avx2", - .cra_driver_name = "__driver-xts-twofish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = xts_twofish_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(twofish)", - .cra_driver_name = "ecb-twofish-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(twofish)", - .cra_driver_name = "cbc-twofish-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(twofish)", - .cra_driver_name = "ctr-twofish-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(twofish)", - .cra_driver_name = "lrw-twofish-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + - TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + - TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(twofish)", - .cra_driver_name = "xts-twofish-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -} }; - -static int __init init(void) -{ - u64 xcr0; - - if (!cpu_has_avx2 || !cpu_has_osxsave) { - pr_info("AVX2 instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX2 detected but unusable.\n"); - return -ENODEV; - } - - return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); -} - -static void __exit fini(void) -{ - crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized"); -MODULE_ALIAS("twofish"); -MODULE_ALIAS("twofish-asm"); diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 2047a562f6b3..a62ba541884e 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -50,26 +50,18 @@ /* 8-way parallel cipher functions */ asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src); -EXPORT_SYMBOL_GPL(twofish_ecb_enc_8way); - asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src); -EXPORT_SYMBOL_GPL(twofish_ecb_dec_8way); asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src); -EXPORT_SYMBOL_GPL(twofish_cbc_dec_8way); - asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); -EXPORT_SYMBOL_GPL(twofish_ctr_8way); asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); -EXPORT_SYMBOL_GPL(twofish_xts_enc_8way); asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); -EXPORT_SYMBOL_GPL(twofish_xts_dec_8way); static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) @@ -77,19 +69,17 @@ static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, __twofish_enc_blk_3way(ctx, dst, src, false); } -void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) { glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(twofish_enc_blk)); } -EXPORT_SYMBOL_GPL(twofish_xts_enc); -void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) { glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(twofish_dec_blk)); } -EXPORT_SYMBOL_GPL(twofish_xts_dec); static const struct common_glue_ctx twofish_enc = { diff --git a/arch/x86/include/asm/crypto/blowfish.h b/arch/x86/include/asm/crypto/blowfish.h deleted file mode 100644 index f097b2face10..000000000000 --- a/arch/x86/include/asm/crypto/blowfish.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef ASM_X86_BLOWFISH_H -#define ASM_X86_BLOWFISH_H - -#include <linux/crypto.h> -#include <crypto/blowfish.h> - -#define BF_PARALLEL_BLOCKS 4 - -/* regular block cipher functions */ -asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, - bool xor); -asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); - -/* 4-way parallel cipher functions */ -asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) -{ - __blowfish_enc_blk(ctx, dst, src, false); -} - -static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk(ctx, dst, src, true); -} - -static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk_4way(ctx, dst, src, false); -} - -static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk_4way(ctx, dst, src, true); -} - -#endif diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index e655c6029b45..878c51ceebb5 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -28,20 +28,6 @@ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src); -/* 8-way parallel cipher functions */ -asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - /* helpers from twofish_x86_64-3way module */ extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, @@ -57,8 +43,4 @@ extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen); -/* helpers from twofish-avx module */ -extern void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); - #endif /* ASM_X86_TWOFISH_H */ diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index d354fb781c57..a55c7efcc4ed 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h @@ -95,8 +95,8 @@ static inline unsigned char current_lock_cmos_reg(void) unsigned char rtc_cmos_read(unsigned char addr); void rtc_cmos_write(unsigned char val, unsigned char addr); -extern int mach_set_rtc_mmss(unsigned long nowtime); -extern unsigned long mach_get_cmos_time(void); +extern int mach_set_rtc_mmss(const struct timespec *now); +extern void mach_get_cmos_time(struct timespec *now); #define RTC_IRQ 8 diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/mrst-vrtc.h index 73668abdbedf..1e69a75412a4 100644 --- a/arch/x86/include/asm/mrst-vrtc.h +++ b/arch/x86/include/asm/mrst-vrtc.h @@ -3,7 +3,7 @@ extern unsigned char vrtc_cmos_read(unsigned char reg); extern void vrtc_cmos_write(unsigned char val, unsigned char reg); -extern unsigned long vrtc_get_time(void); -extern int vrtc_set_mmss(unsigned long nowtime); +extern void vrtc_get_time(struct timespec *now); +extern int vrtc_set_mmss(const struct timespec *now); #endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index d8d99222b36a..828a1565ba57 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -142,6 +142,8 @@ struct x86_cpuinit_ops { void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); }; +struct timespec; + /** * struct x86_platform_ops - platform specific runtime functions * @calibrate_tsc: calibrate TSC @@ -156,8 +158,8 @@ struct x86_cpuinit_ops { */ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); - unsigned long (*get_wallclock)(void); - int (*set_wallclock)(unsigned long nowtime); + void (*get_wallclock)(struct timespec *ts); + int (*set_wallclock)(const struct timespec *ts); void (*iommu_shutdown)(void); bool (*is_untracked_pat_range)(u64 start, u64 end); void (*nmi_init)(void); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index b1581527a236..4934890e4db2 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -364,9 +364,7 @@ static void dt_add_ioapic_domain(unsigned int ioapic_num, * and assigned so we can keep the 1:1 mapping which the ioapic * is having. */ - ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); - if (ret) - pr_err("Error mapping legacy IRQs: %d\n", ret); + irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); if (num > NR_IRQS_LEGACY) { ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 3dd37ebd591b..1f354f4b602b 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -48,10 +48,9 @@ static struct pvclock_wall_clock wall_clock; * have elapsed since the hypervisor wrote the data. So we try to account for * that with system time */ -static unsigned long kvm_get_wallclock(void) +static void kvm_get_wallclock(struct timespec *now) { struct pvclock_vcpu_time_info *vcpu_time; - struct timespec ts; int low, high; int cpu; @@ -64,14 +63,12 @@ static unsigned long kvm_get_wallclock(void) cpu = smp_processor_id(); vcpu_time = &hv_clock[cpu].pvti; - pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); + pvclock_read_wallclock(&wall_clock, vcpu_time, now); preempt_enable(); - - return ts.tv_sec; } -static int kvm_set_wallclock(unsigned long now) +static int kvm_set_wallclock(const struct timespec *now) { return -1; } diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 198eb201ed3b..0aa29394ed6f 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -38,8 +38,9 @@ EXPORT_SYMBOL(rtc_lock); * jump to the next second precisely 500 ms later. Check the Motorola * MC146818A or Dallas DS12887 data sheet for details. */ -int mach_set_rtc_mmss(unsigned long nowtime) +int mach_set_rtc_mmss(const struct timespec *now) { + unsigned long nowtime = now->tv_sec; struct rtc_time tm; int retval = 0; @@ -58,7 +59,7 @@ int mach_set_rtc_mmss(unsigned long nowtime) return retval; } -unsigned long mach_get_cmos_time(void) +void mach_get_cmos_time(struct timespec *now) { unsigned int status, year, mon, day, hour, min, sec, century = 0; unsigned long flags; @@ -107,7 +108,8 @@ unsigned long mach_get_cmos_time(void) } else year += CMOS_YEARS_OFFS; - return mktime(year, mon, day, hour, min, sec); + now->tv_sec = mktime(year, mon, day, hour, min, sec); + now->tv_nsec = 0; } /* Routines for accessing the CMOS RAM/RTC. */ @@ -135,18 +137,13 @@ EXPORT_SYMBOL(rtc_cmos_write); int update_persistent_clock(struct timespec now) { - return x86_platform.set_wallclock(now.tv_sec); + return x86_platform.set_wallclock(&now); } /* not static: needed by APM */ void read_persistent_clock(struct timespec *ts) { - unsigned long retval; - - retval = x86_platform.get_wallclock(); - - ts->tv_sec = retval; - ts->tv_nsec = 0; + x86_platform.get_wallclock(ts); } diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index d482bcaf61c1..6a22c19da663 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -882,9 +882,9 @@ int lguest_setup_irq(unsigned int irq) * It would be far better for everyone if the Guest had its own clock, but * until then the Host gives us the time on every interrupt. */ -static unsigned long lguest_get_wallclock(void) +static void lguest_get_wallclock(struct timespec *now) { - return lguest_data.time.tv_sec; + *now = lguest_data.time; } /* diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index b410b71bdcf7..c8d5577044bb 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -274,8 +274,9 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm, return status; } -int efi_set_rtc_mmss(unsigned long nowtime) +int efi_set_rtc_mmss(const struct timespec *now) { + unsigned long nowtime = now->tv_sec; efi_status_t status; efi_time_t eft; efi_time_cap_t cap; @@ -310,7 +311,7 @@ int efi_set_rtc_mmss(unsigned long nowtime) return 0; } -unsigned long efi_get_time(void) +void efi_get_time(struct timespec *now) { efi_status_t status; efi_time_t eft; @@ -320,8 +321,9 @@ unsigned long efi_get_time(void) if (status != EFI_SUCCESS) pr_err("Oops: efitime: can't read time!\n"); - return mktime(eft.year, eft.month, eft.day, eft.hour, - eft.minute, eft.second); + now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour, + eft.minute, eft.second); + now->tv_nsec = 0; } /* diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c index d62b0a3b5c14..5e355b134ba4 100644 --- a/arch/x86/platform/mrst/vrtc.c +++ b/arch/x86/platform/mrst/vrtc.c @@ -56,7 +56,7 @@ void vrtc_cmos_write(unsigned char val, unsigned char reg) } EXPORT_SYMBOL_GPL(vrtc_cmos_write); -unsigned long vrtc_get_time(void) +void vrtc_get_time(struct timespec *now) { u8 sec, min, hour, mday, mon; unsigned long flags; @@ -82,17 +82,18 @@ unsigned long vrtc_get_time(void) printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d " "mon: %d year: %d\n", sec, min, hour, mday, mon, year); - return mktime(year, mon, mday, hour, min, sec); + now->tv_sec = mktime(year, mon, mday, hour, min, sec); + now->tv_nsec = 0; } -int vrtc_set_mmss(unsigned long nowtime) +int vrtc_set_mmss(const struct timespec *now) { unsigned long flags; struct rtc_time tm; int year; int retval = 0; - rtc_time_to_tm(nowtime, &tm); + rtc_time_to_tm(now->tv_sec, &tm); if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) { /* * tm.year is the number of years since 1900, and the @@ -110,7 +111,7 @@ int vrtc_set_mmss(unsigned long nowtime) } else { printk(KERN_ERR "%s: Invalid vRTC value: write of %lx to vRTC failed\n", - __FUNCTION__, nowtime); + __FUNCTION__, now->tv_sec); retval = -EINVAL; } return retval; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index a690868be837..ee365895b06b 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -15,6 +15,7 @@ #include <linux/math64.h> #include <linux/gfp.h> #include <linux/slab.h> +#include <linux/pvclock_gtod.h> #include <asm/pvclock.h> #include <asm/xen/hypervisor.h> @@ -179,34 +180,56 @@ static void xen_read_wallclock(struct timespec *ts) put_cpu_var(xen_vcpu); } -static unsigned long xen_get_wallclock(void) +static void xen_get_wallclock(struct timespec *now) { - struct timespec ts; + xen_read_wallclock(now); +} - xen_read_wallclock(&ts); - return ts.tv_sec; +static int xen_set_wallclock(const struct timespec *now) +{ + return -1; } -static int xen_set_wallclock(unsigned long now) +static int xen_pvclock_gtod_notify(struct notifier_block *nb, + unsigned long was_set, void *priv) { + /* Protected by the calling core code serialization */ + static struct timespec next_sync; + struct xen_platform_op op; - int rc; + struct timespec now; - /* do nothing for domU */ - if (!xen_initial_domain()) - return -1; + now = __current_kernel_time(); + + /* + * We only take the expensive HV call when the clock was set + * or when the 11 minutes RTC synchronization time elapsed. + */ + if (!was_set && timespec_compare(&now, &next_sync) < 0) + return NOTIFY_OK; op.cmd = XENPF_settime; - op.u.settime.secs = now; - op.u.settime.nsecs = 0; + op.u.settime.secs = now.tv_sec; + op.u.settime.nsecs = now.tv_nsec; op.u.settime.system_time = xen_clocksource_read(); - rc = HYPERVISOR_dom0_op(&op); - WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now); + (void)HYPERVISOR_dom0_op(&op); - return rc; + /* + * Move the next drift compensation time 11 minutes + * ahead. That's emulating the sync_cmos_clock() update for + * the hardware RTC. + */ + next_sync = now; + next_sync.tv_sec += 11 * 60; + + return NOTIFY_OK; } +static struct notifier_block xen_pvclock_gtod_notifier = { + .notifier_call = xen_pvclock_gtod_notify, +}; + static struct clocksource xen_clocksource __read_mostly = { .name = "xen", .rating = 400, @@ -482,6 +505,9 @@ static void __init xen_time_init(void) xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_setup_cpu_clockevents(); + + if (xen_initial_domain()) + pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } void __init xen_init_time_ops(void) @@ -494,7 +520,9 @@ void __init xen_init_time_ops(void) x86_platform.calibrate_tsc = xen_tsc_khz; x86_platform.get_wallclock = xen_get_wallclock; - x86_platform.set_wallclock = xen_set_wallclock; + /* Dom0 uses the native method to set the hardware RTC. */ + if (!xen_initial_domain()) + x86_platform.set_wallclock = xen_set_wallclock; } #ifdef CONFIG_XEN_PVHVM diff --git a/crypto/Kconfig b/crypto/Kconfig index bf8148e74e73..904ffe838567 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -376,6 +376,25 @@ config CRYPTO_CRC32_PCLMUL which will enable any routine to use the CRC-32-IEEE 802.3 checksum and gain better performance as compared with the table implementation. +config CRYPTO_CRCT10DIF + tristate "CRCT10DIF algorithm" + select CRYPTO_HASH + help + CRC T10 Data Integrity Field computation is being cast as + a crypto transform. This allows for faster crc t10 diff + transforms to be used if they are available. + +config CRYPTO_CRCT10DIF_PCLMUL + tristate "CRCT10DIF PCLMULQDQ hardware acceleration" + depends on X86 && 64BIT && CRC_T10DIF + select CRYPTO_HASH + help + For x86_64 processors with SSE4.2 and PCLMULQDQ supported, + CRC T10 DIF PCLMULQDQ computation can be hardware + accelerated PCLMULQDQ instruction. This option will create + 'crct10dif-plcmul' module, which is faster when computing the + crct10dif checksum as compared with the generic table implementation. + config CRYPTO_GHASH tristate "GHASH digest algorithm" select CRYPTO_GF128MUL @@ -820,25 +839,6 @@ config CRYPTO_BLOWFISH_X86_64 See also: <http://www.schneier.com/blowfish.html> -config CRYPTO_BLOWFISH_AVX2_X86_64 - tristate "Blowfish cipher algorithm (x86_64/AVX2)" - depends on X86 && 64BIT - depends on BROKEN - select CRYPTO_ALGAPI - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER_X86 - select CRYPTO_BLOWFISH_COMMON - select CRYPTO_BLOWFISH_X86_64 - help - Blowfish cipher algorithm (x86_64/AVX2), by Bruce Schneier. - - This is a variable key length cipher which can use keys from 32 - bits to 448 bits in length. It's fast, simple and specifically - designed for use on "large microprocessors". - - See also: - <http://www.schneier.com/blowfish.html> - config CRYPTO_CAMELLIA tristate "Camellia cipher algorithms" depends on CRYPTO @@ -1297,31 +1297,6 @@ config CRYPTO_TWOFISH_AVX_X86_64 See also: <http://www.schneier.com/twofish.html> -config CRYPTO_TWOFISH_AVX2_X86_64 - tristate "Twofish cipher algorithm (x86_64/AVX2)" - depends on X86 && 64BIT - depends on BROKEN - select CRYPTO_ALGAPI - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER_X86 - select CRYPTO_GLUE_HELPER_X86 - select CRYPTO_TWOFISH_COMMON - select CRYPTO_TWOFISH_X86_64 - select CRYPTO_TWOFISH_X86_64_3WAY - select CRYPTO_TWOFISH_AVX_X86_64 - select CRYPTO_LRW - select CRYPTO_XTS - help - Twofish cipher algorithm (x86_64/AVX2). - - Twofish was submitted as an AES (Advanced Encryption Standard) - candidate cipher by researchers at CounterPane Systems. It is a - 16 round block cipher supporting key sizes of 128, 192, and 256 - bits. - - See also: - <http://www.schneier.com/twofish.html> - comment "Compression" config CRYPTO_DEFLATE diff --git a/crypto/Makefile b/crypto/Makefile index a8e9b0fefbe9..62af87df8729 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o obj-$(CONFIG_CRYPTO_CRC32) += crc32.o +obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_842) += 842.o diff --git a/crypto/crct10dif.c b/crypto/crct10dif.c new file mode 100644 index 000000000000..92aca96d6b98 --- /dev/null +++ b/crypto/crct10dif.c @@ -0,0 +1,178 @@ +/* + * Cryptographic API. + * + * T10 Data Integrity Field CRC16 Crypto Transform + * + * Copyright (c) 2007 Oracle Corporation. All rights reserved. + * Written by Martin K. Petersen <martin.petersen@oracle.com> + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/crc-t10dif.h> +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/kernel.h> + +struct chksum_desc_ctx { + __u16 crc; +}; + +/* Table generated using the following polynomium: + * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 + * gt: 0x8bb7 + */ +static const __u16 t10_dif_crc_table[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 +}; + +__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len) +{ + unsigned int i; + + for (i = 0 ; i < len ; i++) + crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; + + return crc; +} +EXPORT_SYMBOL(crc_t10dif_generic); + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ + +static int chksum_init(struct shash_desc *desc) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = 0; + + return 0; +} + +static int chksum_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = crc_t10dif_generic(ctx->crc, data, length); + return 0; +} + +static int chksum_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + *(__u16 *)out = ctx->crc; + return 0; +} + +static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + return 0; +} + +static int chksum_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(&ctx->crc, data, len, out); +} + +static int chksum_digest(struct shash_desc *desc, const u8 *data, + unsigned int length, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(&ctx->crc, data, length, out); +} + +static struct shash_alg alg = { + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .init = chksum_init, + .update = chksum_update, + .final = chksum_final, + .finup = chksum_finup, + .digest = chksum_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crct10dif", + .cra_driver_name = "crct10dif-generic", + .cra_priority = 100, + .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init crct10dif_mod_init(void) +{ + int ret; + + ret = crypto_register_shash(&alg); + return ret; +} + +static void __exit crct10dif_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crct10dif_mod_init); +module_exit(crct10dif_mod_fini); + +MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); +MODULE_DESCRIPTION("T10 DIF CRC calculation."); +MODULE_LICENSE("GPL"); diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 4c5862095679..6ed124f3ea0f 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -251,6 +251,7 @@ static struct shash_alg sha512_algs[2] = { { .descsize = sizeof(struct sha512_state), .base = { .cra_name = "sha512", + .cra_driver_name = "sha512-generic", .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, @@ -263,6 +264,7 @@ static struct shash_alg sha512_algs[2] = { { .descsize = sizeof(struct sha512_state), .base = { .cra_name = "sha384", + .cra_driver_name = "sha384-generic", .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_module = THIS_MODULE, diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 66d254ce0d11..25a5934f0e50 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1174,6 +1174,10 @@ static int do_test(int m) ret += tcrypt_test("ghash"); break; + case 47: + ret += tcrypt_test("crct10dif"); + break; + case 100: ret += tcrypt_test("hmac(md5)"); break; @@ -1498,6 +1502,10 @@ static int do_test(int m) test_hash_speed("crc32c", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; + case 320: + test_hash_speed("crct10dif", sec, generic_hash_speed_template); + if (mode > 300 && mode < 400) break; + case 399: break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 5823735cf381..2f00607039e2 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -184,8 +184,9 @@ static int do_one_async_hash_op(struct ahash_request *req, return ret; } -static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, - unsigned int tcount, bool use_digest) +static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, + unsigned int tcount, bool use_digest, + const int align_offset) { const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm)); unsigned int i, j, k, temp; @@ -216,10 +217,15 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, if (template[i].np) continue; + ret = -EINVAL; + if (WARN_ON(align_offset + template[i].psize > PAGE_SIZE)) + goto out; + j++; memset(result, 0, 64); hash_buff = xbuf[0]; + hash_buff += align_offset; memcpy(hash_buff, template[i].plaintext, template[i].psize); sg_init_one(&sg[0], hash_buff, template[i].psize); @@ -281,6 +287,10 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, j = 0; for (i = 0; i < tcount; i++) { + /* alignment tests are only done with continuous buffers */ + if (align_offset != 0) + break; + if (template[i].np) { j++; memset(result, 0, 64); @@ -358,9 +368,36 @@ out_nobuf: return ret; } +static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, + unsigned int tcount, bool use_digest) +{ + unsigned int alignmask; + int ret; + + ret = __test_hash(tfm, template, tcount, use_digest, 0); + if (ret) + return ret; + + /* test unaligned buffers, check with one byte offset */ + ret = __test_hash(tfm, template, tcount, use_digest, 1); + if (ret) + return ret; + + alignmask = crypto_tfm_alg_alignmask(&tfm->base); + if (alignmask) { + /* Check if alignment mask for tfm is correctly set. */ + ret = __test_hash(tfm, template, tcount, use_digest, + alignmask + 1); + if (ret) + return ret; + } + + return 0; +} + static int __test_aead(struct crypto_aead *tfm, int enc, struct aead_testvec *template, unsigned int tcount, - const bool diff_dst) + const bool diff_dst, const int align_offset) { const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm)); unsigned int i, j, k, n, temp; @@ -423,15 +460,16 @@ static int __test_aead(struct crypto_aead *tfm, int enc, if (!template[i].np) { j++; - /* some tepmplates have no input data but they will + /* some templates have no input data but they will * touch input */ input = xbuf[0]; + input += align_offset; assoc = axbuf[0]; ret = -EINVAL; - if (WARN_ON(template[i].ilen > PAGE_SIZE || - template[i].alen > PAGE_SIZE)) + if (WARN_ON(align_offset + template[i].ilen > + PAGE_SIZE || template[i].alen > PAGE_SIZE)) goto out; memcpy(input, template[i].input, template[i].ilen); @@ -470,6 +508,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc, if (diff_dst) { output = xoutbuf[0]; + output += align_offset; sg_init_one(&sgout[0], output, template[i].ilen + (enc ? authsize : 0)); @@ -530,6 +569,10 @@ static int __test_aead(struct crypto_aead *tfm, int enc, } for (i = 0, j = 0; i < tcount; i++) { + /* alignment tests are only done with continuous buffers */ + if (align_offset != 0) + break; + if (template[i].np) { j++; @@ -732,15 +775,34 @@ out_noxbuf: static int test_aead(struct crypto_aead *tfm, int enc, struct aead_testvec *template, unsigned int tcount) { + unsigned int alignmask; int ret; /* test 'dst == src' case */ - ret = __test_aead(tfm, enc, template, tcount, false); + ret = __test_aead(tfm, enc, template, tcount, false, 0); if (ret) return ret; /* test 'dst != src' case */ - return __test_aead(tfm, enc, template, tcount, true); + ret = __test_aead(tfm, enc, template, tcount, true, 0); + if (ret) + return ret; + + /* test unaligned buffers, check with one byte offset */ + ret = __test_aead(tfm, enc, template, tcount, true, 1); + if (ret) + return ret; + + alignmask = crypto_tfm_alg_alignmask(&tfm->base); + if (alignmask) { + /* Check if alignment mask for tfm is correctly set. */ + ret = __test_aead(tfm, enc, template, tcount, true, + alignmask + 1); + if (ret) + return ret; + } + + return 0; } static int test_cipher(struct crypto_cipher *tfm, int enc, @@ -820,7 +882,7 @@ out_nobuf: static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, struct cipher_testvec *template, unsigned int tcount, - const bool diff_dst) + const bool diff_dst, const int align_offset) { const char *algo = crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(tfm)); @@ -876,10 +938,12 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, j++; ret = -EINVAL; - if (WARN_ON(template[i].ilen > PAGE_SIZE)) + if (WARN_ON(align_offset + template[i].ilen > + PAGE_SIZE)) goto out; data = xbuf[0]; + data += align_offset; memcpy(data, template[i].input, template[i].ilen); crypto_ablkcipher_clear_flags(tfm, ~0); @@ -900,6 +964,7 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, sg_init_one(&sg[0], data, template[i].ilen); if (diff_dst) { data = xoutbuf[0]; + data += align_offset; sg_init_one(&sgout[0], data, template[i].ilen); } @@ -941,6 +1006,9 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, j = 0; for (i = 0; i < tcount; i++) { + /* alignment tests are only done with continuous buffers */ + if (align_offset != 0) + break; if (template[i].iv) memcpy(iv, template[i].iv, MAX_IVLEN); @@ -1075,15 +1143,34 @@ out_nobuf: static int test_skcipher(struct crypto_ablkcipher *tfm, int enc, struct cipher_testvec *template, unsigned int tcount) { + unsigned int alignmask; int ret; /* test 'dst == src' case */ - ret = __test_skcipher(tfm, enc, template, tcount, false); + ret = __test_skcipher(tfm, enc, template, tcount, false, 0); if (ret) return ret; /* test 'dst != src' case */ - return __test_skcipher(tfm, enc, template, tcount, true); + ret = __test_skcipher(tfm, enc, template, tcount, true, 0); + if (ret) + return ret; + + /* test unaligned buffers, check with one byte offset */ + ret = __test_skcipher(tfm, enc, template, tcount, true, 1); + if (ret) + return ret; + + alignmask = crypto_tfm_alg_alignmask(&tfm->base); + if (alignmask) { + /* Check if alignment mask for tfm is correctly set. */ + ret = __test_skcipher(tfm, enc, template, tcount, true, + alignmask + 1); + if (ret) + return ret; + } + + return 0; } static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate, @@ -1654,16 +1741,10 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "__cbc-twofish-avx", .test = alg_test_null, }, { - .alg = "__cbc-twofish-avx2", - .test = alg_test_null, - }, { .alg = "__driver-cbc-aes-aesni", .test = alg_test_null, .fips_allowed = 1, }, { - .alg = "__driver-cbc-blowfish-avx2", - .test = alg_test_null, - }, { .alg = "__driver-cbc-camellia-aesni", .test = alg_test_null, }, { @@ -1688,16 +1769,10 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "__driver-cbc-twofish-avx", .test = alg_test_null, }, { - .alg = "__driver-cbc-twofish-avx2", - .test = alg_test_null, - }, { .alg = "__driver-ecb-aes-aesni", .test = alg_test_null, .fips_allowed = 1, }, { - .alg = "__driver-ecb-blowfish-avx2", - .test = alg_test_null, - }, { .alg = "__driver-ecb-camellia-aesni", .test = alg_test_null, }, { @@ -1722,9 +1797,6 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "__driver-ecb-twofish-avx", .test = alg_test_null, }, { - .alg = "__driver-ecb-twofish-avx2", - .test = alg_test_null, - }, { .alg = "__ghash-pclmulqdqni", .test = alg_test_null, .fips_allowed = 1, @@ -1974,12 +2046,19 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { - .alg = "cryptd(__driver-cbc-aes-aesni)", - .test = alg_test_null, + .alg = "crct10dif", + .test = alg_test_hash, .fips_allowed = 1, + .suite = { + .hash = { + .vecs = crct10dif_tv_template, + .count = CRCT10DIF_TEST_VECTORS + } + } }, { - .alg = "cryptd(__driver-cbc-blowfish-avx2)", + .alg = "cryptd(__driver-cbc-aes-aesni)", .test = alg_test_null, + .fips_allowed = 1, }, { .alg = "cryptd(__driver-cbc-camellia-aesni)", .test = alg_test_null, @@ -1994,9 +2073,6 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_null, .fips_allowed = 1, }, { - .alg = "cryptd(__driver-ecb-blowfish-avx2)", - .test = alg_test_null, - }, { .alg = "cryptd(__driver-ecb-camellia-aesni)", .test = alg_test_null, }, { @@ -2021,9 +2097,6 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "cryptd(__driver-ecb-twofish-avx)", .test = alg_test_null, }, { - .alg = "cryptd(__driver-ecb-twofish-avx2)", - .test = alg_test_null, - }, { .alg = "cryptd(__driver-gcm-aes-aesni)", .test = alg_test_null, .fips_allowed = 1, @@ -3068,6 +3141,35 @@ static const struct alg_test_desc alg_test_descs[] = { } }; +static bool alg_test_descs_checked; + +static void alg_test_descs_check_order(void) +{ + int i; + + /* only check once */ + if (alg_test_descs_checked) + return; + + alg_test_descs_checked = true; + + for (i = 1; i < ARRAY_SIZE(alg_test_descs); i++) { + int diff = strcmp(alg_test_descs[i - 1].alg, + alg_test_descs[i].alg); + + if (WARN_ON(diff > 0)) { + pr_warn("testmgr: alg_test_descs entries in wrong order: '%s' before '%s'\n", + alg_test_descs[i - 1].alg, + alg_test_descs[i].alg); + } + + if (WARN_ON(diff == 0)) { + pr_warn("testmgr: duplicate alg_test_descs entry: '%s'\n", + alg_test_descs[i].alg); + } + } +} + static int alg_find_test(const char *alg) { int start = 0; @@ -3099,6 +3201,8 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask) int j; int rc; + alg_test_descs_check_order(); + if ((type & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_CIPHER) { char nalg[CRYPTO_MAX_ALG_NAME]; diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 1e701bc075b9..7d44aa3d6b44 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -450,6 +450,39 @@ static struct hash_testvec rmd320_tv_template[] = { } }; +#define CRCT10DIF_TEST_VECTORS 3 +static struct hash_testvec crct10dif_tv_template[] = { + { + .plaintext = "abc", + .psize = 3, +#ifdef __LITTLE_ENDIAN + .digest = "\x3b\x44", +#else + .digest = "\x44\x3b", +#endif + }, { + .plaintext = "1234567890123456789012345678901234567890" + "123456789012345678901234567890123456789", + .psize = 79, +#ifdef __LITTLE_ENDIAN + .digest = "\x70\x4b", +#else + .digest = "\x4b\x70", +#endif + }, { + .plaintext = + "abcddddddddddddddddddddddddddddddddddddddddddddddddddddd", + .psize = 56, +#ifdef __LITTLE_ENDIAN + .digest = "\xe3\x9c", +#else + .digest = "\x9c\xe3", +#endif + .np = 2, + .tap = { 28, 28 } + } +}; + /* * SHA1 test vectors from from FIPS PUB 180-1 * Long vector from CAVS 5.0 diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c index 7c73d4aca36b..bf9fc6b79328 100644 --- a/drivers/char/hw_random/atmel-rng.c +++ b/drivers/char/hw_random/atmel-rng.c @@ -108,8 +108,6 @@ static int atmel_trng_remove(struct platform_device *pdev) clk_disable(trng->clk); clk_put(trng->clk); - platform_set_drvdata(pdev, NULL); - return 0; } diff --git a/drivers/char/hw_random/bcm63xx-rng.c b/drivers/char/hw_random/bcm63xx-rng.c index f343b7d0dfa1..36581ea562cb 100644 --- a/drivers/char/hw_random/bcm63xx-rng.c +++ b/drivers/char/hw_random/bcm63xx-rng.c @@ -137,7 +137,6 @@ static int bcm63xx_rng_probe(struct platform_device *pdev) out_clk_disable: clk_disable(clk); out_free_rng: - platform_set_drvdata(pdev, NULL); kfree(rng); out_free_priv: kfree(priv); @@ -154,7 +153,6 @@ static int bcm63xx_rng_remove(struct platform_device *pdev) clk_disable(priv->clk); kfree(priv); kfree(rng); - platform_set_drvdata(pdev, NULL); return 0; } diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index 20b962e1d832..f9beed54d0c8 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c @@ -700,7 +700,7 @@ static int n2rng_probe(struct platform_device *op) if (err) goto out_free_units; - dev_set_drvdata(&op->dev, np); + platform_set_drvdata(op, np); schedule_delayed_work(&np->work, 0); @@ -721,7 +721,7 @@ out: static int n2rng_remove(struct platform_device *op) { - struct n2rng *np = dev_get_drvdata(&op->dev); + struct n2rng *np = platform_get_drvdata(op); np->flags |= N2RNG_FLAG_SHUTDOWN; @@ -736,8 +736,6 @@ static int n2rng_remove(struct platform_device *op) kfree(np); - dev_set_drvdata(&op->dev, NULL); - return 0; } diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c index 96de0249e595..232b87fb5fc9 100644 --- a/drivers/char/hw_random/nomadik-rng.c +++ b/drivers/char/hw_random/nomadik-rng.c @@ -51,7 +51,7 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id) return ret; } - clk_enable(rng_clk); + clk_prepare_enable(rng_clk); ret = amba_request_regions(dev, dev->dev.init_name); if (ret) diff --git a/drivers/char/hw_random/octeon-rng.c b/drivers/char/hw_random/octeon-rng.c index 1eada566ca70..f2885dbe1849 100644 --- a/drivers/char/hw_random/octeon-rng.c +++ b/drivers/char/hw_random/octeon-rng.c @@ -96,7 +96,7 @@ static int octeon_rng_probe(struct platform_device *pdev) rng->ops = ops; - dev_set_drvdata(&pdev->dev, &rng->ops); + platform_set_drvdata(pdev, &rng->ops); ret = hwrng_register(&rng->ops); if (ret) return -ENOENT; @@ -108,7 +108,7 @@ static int octeon_rng_probe(struct platform_device *pdev) static int __exit octeon_rng_remove(struct platform_device *pdev) { - struct hwrng *rng = dev_get_drvdata(&pdev->dev); + struct hwrng *rng = platform_get_drvdata(pdev); hwrng_unregister(rng); diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c index d2903e772270..6843ec87b98b 100644 --- a/drivers/char/hw_random/omap-rng.c +++ b/drivers/char/hw_random/omap-rng.c @@ -116,7 +116,7 @@ static int omap_rng_probe(struct platform_device *pdev) }; omap_rng_ops.priv = (unsigned long)priv; - dev_set_drvdata(&pdev->dev, priv); + platform_set_drvdata(pdev, priv); priv->mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); priv->base = devm_ioremap_resource(&pdev->dev, priv->mem_res); @@ -124,7 +124,7 @@ static int omap_rng_probe(struct platform_device *pdev) ret = PTR_ERR(priv->base); goto err_ioremap; } - dev_set_drvdata(&pdev->dev, priv); + platform_set_drvdata(pdev, priv); pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); @@ -151,7 +151,7 @@ err_ioremap: static int __exit omap_rng_remove(struct platform_device *pdev) { - struct omap_rng_private_data *priv = dev_get_drvdata(&pdev->dev); + struct omap_rng_private_data *priv = platform_get_drvdata(pdev); hwrng_unregister(&omap_rng_ops); diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c index 3e75737f5fe1..d2120ba8f3f9 100644 --- a/drivers/char/hw_random/timeriomem-rng.c +++ b/drivers/char/hw_random/timeriomem-rng.c @@ -192,7 +192,6 @@ out_release_io: out_timer: del_timer_sync(&priv->timer); out_free: - platform_set_drvdata(pdev, NULL); kfree(priv); return err; } @@ -209,7 +208,6 @@ static int timeriomem_rng_remove(struct platform_device *pdev) del_timer_sync(&priv->timer); iounmap(priv->io_base); release_mem_region(res->start, resource_size(res)); - platform_set_drvdata(pdev, NULL); kfree(priv); return 0; diff --git a/drivers/char/hw_random/tx4939-rng.c b/drivers/char/hw_random/tx4939-rng.c index d34a24a0d484..00593c847cf0 100644 --- a/drivers/char/hw_random/tx4939-rng.c +++ b/drivers/char/hw_random/tx4939-rng.c @@ -154,7 +154,6 @@ static int __exit tx4939_rng_remove(struct platform_device *dev) struct tx4939_rng *rngdev = platform_get_drvdata(dev); hwrng_unregister(&rngdev->rng); - platform_set_drvdata(dev, NULL); return 0; } diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 5871933c4e51..81465c21f873 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -87,3 +87,8 @@ config CLKSRC_SAMSUNG_PWM Samsung S3C, S5P and Exynos SoCs, replacing an earlier driver for all devicetree enabled platforms. This driver will be needed only on systems that do not have the Exynos MCT available. + +config VF_PIT_TIMER + bool + help + Support for Period Interrupt Timer on Freescale Vybrid Family SoCs. diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 8d979c72aa94..9ba8b4d867e3 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -22,10 +22,13 @@ obj-$(CONFIG_ARCH_PRIMA2) += timer-prima2.o obj-$(CONFIG_SUN4I_TIMER) += sun4i_timer.o obj-$(CONFIG_ARCH_TEGRA) += tegra20_timer.o obj-$(CONFIG_VT8500_TIMER) += vt8500_timer.o +obj-$(CONFIG_ARCH_NSPIRE) += zevio-timer.o obj-$(CONFIG_ARCH_BCM) += bcm_kona_timer.o obj-$(CONFIG_CADENCE_TTC_TIMER) += cadence_ttc_timer.o obj-$(CONFIG_CLKSRC_EXYNOS_MCT) += exynos_mct.o obj-$(CONFIG_CLKSRC_SAMSUNG_PWM) += samsung_pwm_timer.o +obj-$(CONFIG_VF_PIT_TIMER) += vf_pit_timer.o obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o +obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST) += dummy_timer.o diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c index 766611d29945..07ea7ce900dc 100644 --- a/drivers/clocksource/bcm2835_timer.c +++ b/drivers/clocksource/bcm2835_timer.c @@ -28,8 +28,8 @@ #include <linux/of_platform.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <asm/irq.h> #define REG_CONTROL 0x00 diff --git a/drivers/clocksource/clksrc-dbx500-prcmu.c b/drivers/clocksource/clksrc-dbx500-prcmu.c index 77398f8c19a0..a9fd4ad25674 100644 --- a/drivers/clocksource/clksrc-dbx500-prcmu.c +++ b/drivers/clocksource/clksrc-dbx500-prcmu.c @@ -14,8 +14,7 @@ */ #include <linux/clockchips.h> #include <linux/clksrc-dbx500-prcmu.h> - -#include <asm/sched_clock.h> +#include <linux/sched_clock.h> #define RATE_32K 32768 diff --git a/drivers/clocksource/dummy_timer.c b/drivers/clocksource/dummy_timer.c new file mode 100644 index 000000000000..1f55f9620338 --- /dev/null +++ b/drivers/clocksource/dummy_timer.c @@ -0,0 +1,69 @@ +/* + * linux/drivers/clocksource/dummy_timer.c + * + * Copyright (C) 2013 ARM Ltd. + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/clockchips.h> +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/cpumask.h> + +static DEFINE_PER_CPU(struct clock_event_device, dummy_timer_evt); + +static void dummy_timer_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + /* + * Core clockevents code will call this when exchanging timer devices. + * We don't need to do anything here. + */ +} + +static void __cpuinit dummy_timer_setup(void) +{ + int cpu = smp_processor_id(); + struct clock_event_device *evt = __this_cpu_ptr(&dummy_timer_evt); + + evt->name = "dummy_timer"; + evt->features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_DUMMY; + evt->rating = 100; + evt->set_mode = dummy_timer_set_mode; + evt->cpumask = cpumask_of(cpu); + + clockevents_register_device(evt); +} + +static int __cpuinit dummy_timer_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + if ((action & ~CPU_TASKS_FROZEN) == CPU_STARTING) + dummy_timer_setup(); + + return NOTIFY_OK; +} + +static struct notifier_block dummy_timer_cpu_nb __cpuinitdata = { + .notifier_call = dummy_timer_cpu_notify, +}; + +static int __init dummy_timer_register(void) +{ + int err = register_cpu_notifier(&dummy_timer_cpu_nb); + if (err) + return err; + + /* We won't get a call on the boot CPU, so register immediately */ + if (num_possible_cpus() > 1) + dummy_timer_setup(); + + return 0; +} +early_initcall(dummy_timer_register); diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c index 8c2a35f26d9b..e54ca1062d8e 100644 --- a/drivers/clocksource/dw_apb_timer.c +++ b/drivers/clocksource/dw_apb_timer.c @@ -387,15 +387,3 @@ cycle_t dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs) { return (cycle_t)~apbt_readl(&dw_cs->timer, APBTMR_N_CURRENT_VALUE); } - -/** - * dw_apb_clocksource_unregister() - unregister and free a clocksource. - * - * @dw_cs: The clocksource to unregister/free. - */ -void dw_apb_clocksource_unregister(struct dw_apb_clocksource *dw_cs) -{ - clocksource_unregister(&dw_cs->cs); - - kfree(dw_cs); -} diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c index cef554432a33..4cbae4f762b1 100644 --- a/drivers/clocksource/dw_apb_timer_of.c +++ b/drivers/clocksource/dw_apb_timer_of.c @@ -21,9 +21,7 @@ #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/clk.h> - -#include <asm/mach/time.h> -#include <asm/sched_clock.h> +#include <linux/sched_clock.h> static void timer_get_base_and_rate(struct device_node *np, void __iomem **base, u32 *rate) @@ -68,7 +66,7 @@ static void add_clockevent(struct device_node *event_timer) u32 irq, rate; irq = irq_of_parse_and_map(event_timer, 0); - if (irq == NO_IRQ) + if (irq == 0) panic("No IRQ for clock event timer"); timer_get_base_and_rate(event_timer, &iobase, &rate); diff --git a/drivers/clocksource/metag_generic.c b/drivers/clocksource/metag_generic.c index ade7513a11d1..6722f0e2fe40 100644 --- a/drivers/clocksource/metag_generic.c +++ b/drivers/clocksource/metag_generic.c @@ -184,6 +184,8 @@ int __init metag_generic_timer_init(void) #ifdef CONFIG_METAG_META21 hwtimer_freq = get_coreclock() / (metag_in32(EXPAND_TIMER_DIV) + 1); #endif + pr_info("Timer frequency: %u Hz\n", hwtimer_freq); + clocksource_register_hz(&clocksource_metag, hwtimer_freq); setup_irq(tbisig_map(TBID_SIGNUM_TRT), &metag_timer_irq); diff --git a/drivers/clocksource/mxs_timer.c b/drivers/clocksource/mxs_timer.c index 02af4204af86..0f5e65f74dc3 100644 --- a/drivers/clocksource/mxs_timer.c +++ b/drivers/clocksource/mxs_timer.c @@ -29,9 +29,9 @@ #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/stmp_device.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> -#include <asm/sched_clock.h> /* * There are 2 versions of the timrot on Freescale MXS-based SoCs. diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c index b9415b622f55..7d2c2c56f73c 100644 --- a/drivers/clocksource/nomadik-mtu.c +++ b/drivers/clocksource/nomadik-mtu.c @@ -21,8 +21,8 @@ #include <linux/delay.h> #include <linux/err.h> #include <linux/platform_data/clocksource-nomadik-mtu.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> -#include <asm/sched_clock.h> /* * The MTU device hosts four different counters, with 4 set of diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c index 0234c8d2c8f2..584b5472eea3 100644 --- a/drivers/clocksource/samsung_pwm_timer.c +++ b/drivers/clocksource/samsung_pwm_timer.c @@ -21,10 +21,10 @@ #include <linux/of_irq.h> #include <linux/platform_device.h> #include <linux/slab.h> +#include <linux/sched_clock.h> #include <clocksource/samsung_pwm.h> -#include <asm/sched_clock.h> /* * Clocksource driver diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c index ae877b021b54..93961703b887 100644 --- a/drivers/clocksource/tegra20_timer.c +++ b/drivers/clocksource/tegra20_timer.c @@ -26,10 +26,10 @@ #include <linux/io.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> #include <asm/smp_twd.h> -#include <asm/sched_clock.h> #define RTC_SECONDS 0x08 #define RTC_SHADOW_SECONDS 0x0c diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index 47a673070d70..efdca3263afe 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -27,8 +27,8 @@ #include <linux/of_address.h> #include <linux/irq.h> #include <linux/module.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <asm/localtimer.h> #include <linux/percpu.h> /* diff --git a/drivers/clocksource/timer-marco.c b/drivers/clocksource/timer-marco.c index 97738dbf3e3b..e5dc9129ca26 100644 --- a/drivers/clocksource/timer-marco.c +++ b/drivers/clocksource/timer-marco.c @@ -17,7 +17,7 @@ #include <linux/of.h> #include <linux/of_irq.h> #include <linux/of_address.h> -#include <asm/sched_clock.h> +#include <linux/sched_clock.h> #include <asm/localtimer.h> #include <asm/mach/time.h> diff --git a/drivers/clocksource/timer-prima2.c b/drivers/clocksource/timer-prima2.c index 760882665d7a..ef3cfb269d8b 100644 --- a/drivers/clocksource/timer-prima2.c +++ b/drivers/clocksource/timer-prima2.c @@ -18,7 +18,7 @@ #include <linux/of.h> #include <linux/of_irq.h> #include <linux/of_address.h> -#include <asm/sched_clock.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> #define SIRFSOC_TIMER_COUNTER_LO 0x0000 diff --git a/drivers/clocksource/vf_pit_timer.c b/drivers/clocksource/vf_pit_timer.c new file mode 100644 index 000000000000..587e0202a70b --- /dev/null +++ b/drivers/clocksource/vf_pit_timer.c @@ -0,0 +1,194 @@ +/* + * Copyright 2012-2013 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + */ + +#include <linux/interrupt.h> +#include <linux/clockchips.h> +#include <linux/clk.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/sched_clock.h> + +/* + * Each pit takes 0x10 Bytes register space + */ +#define PITMCR 0x00 +#define PIT0_OFFSET 0x100 +#define PITn_OFFSET(n) (PIT0_OFFSET + 0x10 * (n)) +#define PITLDVAL 0x00 +#define PITCVAL 0x04 +#define PITTCTRL 0x08 +#define PITTFLG 0x0c + +#define PITMCR_MDIS (0x1 << 1) + +#define PITTCTRL_TEN (0x1 << 0) +#define PITTCTRL_TIE (0x1 << 1) +#define PITCTRL_CHN (0x1 << 2) + +#define PITTFLG_TIF 0x1 + +static void __iomem *clksrc_base; +static void __iomem *clkevt_base; +static unsigned long cycle_per_jiffy; + +static inline void pit_timer_enable(void) +{ + __raw_writel(PITTCTRL_TEN | PITTCTRL_TIE, clkevt_base + PITTCTRL); +} + +static inline void pit_timer_disable(void) +{ + __raw_writel(0, clkevt_base + PITTCTRL); +} + +static inline void pit_irq_acknowledge(void) +{ + __raw_writel(PITTFLG_TIF, clkevt_base + PITTFLG); +} + +static unsigned int pit_read_sched_clock(void) +{ + return __raw_readl(clksrc_base + PITCVAL); +} + +static int __init pit_clocksource_init(unsigned long rate) +{ + /* set the max load value and start the clock source counter */ + __raw_writel(0, clksrc_base + PITTCTRL); + __raw_writel(~0UL, clksrc_base + PITLDVAL); + __raw_writel(PITTCTRL_TEN, clksrc_base + PITTCTRL); + + setup_sched_clock(pit_read_sched_clock, 32, rate); + return clocksource_mmio_init(clksrc_base + PITCVAL, "vf-pit", rate, + 300, 32, clocksource_mmio_readl_down); +} + +static int pit_set_next_event(unsigned long delta, + struct clock_event_device *unused) +{ + /* + * set a new value to PITLDVAL register will not restart the timer, + * to abort the current cycle and start a timer period with the new + * value, the timer must be disabled and enabled again. + * and the PITLAVAL should be set to delta minus one according to pit + * hardware requirement. + */ + pit_timer_disable(); + __raw_writel(delta - 1, clkevt_base + PITLDVAL); + pit_timer_enable(); + + return 0; +} + +static void pit_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + pit_set_next_event(cycle_per_jiffy, evt); + break; + default: + break; + } +} + +static irqreturn_t pit_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *evt = dev_id; + + pit_irq_acknowledge(); + + /* + * pit hardware doesn't support oneshot, it will generate an interrupt + * and reload the counter value from PITLDVAL when PITCVAL reach zero, + * and start the counter again. So software need to disable the timer + * to stop the counter loop in ONESHOT mode. + */ + if (likely(evt->mode == CLOCK_EVT_MODE_ONESHOT)) + pit_timer_disable(); + + evt->event_handler(evt); + + return IRQ_HANDLED; +} + +static struct clock_event_device clockevent_pit = { + .name = "VF pit timer", + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, + .set_mode = pit_set_mode, + .set_next_event = pit_set_next_event, + .rating = 300, +}; + +static struct irqaction pit_timer_irq = { + .name = "VF pit timer", + .flags = IRQF_TIMER | IRQF_IRQPOLL, + .handler = pit_timer_interrupt, + .dev_id = &clockevent_pit, +}; + +static int __init pit_clockevent_init(unsigned long rate, int irq) +{ + __raw_writel(0, clkevt_base + PITTCTRL); + __raw_writel(PITTFLG_TIF, clkevt_base + PITTFLG); + + BUG_ON(setup_irq(irq, &pit_timer_irq)); + + clockevent_pit.cpumask = cpumask_of(0); + clockevent_pit.irq = irq; + /* + * The value for the LDVAL register trigger is calculated as: + * LDVAL trigger = (period / clock period) - 1 + * The pit is a 32-bit down count timer, when the conter value + * reaches 0, it will generate an interrupt, thus the minimal + * LDVAL trigger value is 1. And then the min_delta is + * minimal LDVAL trigger value + 1, and the max_delta is full 32-bit. + */ + clockevents_config_and_register(&clockevent_pit, rate, 2, 0xffffffff); + + return 0; +} + +static void __init pit_timer_init(struct device_node *np) +{ + struct clk *pit_clk; + void __iomem *timer_base; + unsigned long clk_rate; + int irq; + + timer_base = of_iomap(np, 0); + BUG_ON(!timer_base); + + /* + * PIT0 and PIT1 can be chained to build a 64-bit timer, + * so choose PIT2 as clocksource, PIT3 as clockevent device, + * and leave PIT0 and PIT1 unused for anyone else who needs them. + */ + clksrc_base = timer_base + PITn_OFFSET(2); + clkevt_base = timer_base + PITn_OFFSET(3); + + irq = irq_of_parse_and_map(np, 0); + BUG_ON(irq <= 0); + + pit_clk = of_clk_get(np, 0); + BUG_ON(IS_ERR(pit_clk)); + + BUG_ON(clk_prepare_enable(pit_clk)); + + clk_rate = clk_get_rate(pit_clk); + cycle_per_jiffy = clk_rate / (HZ); + + /* enable the pit module */ + __raw_writel(~PITMCR_MDIS, timer_base + PITMCR); + + BUG_ON(pit_clocksource_init(clk_rate)); + + pit_clockevent_init(clk_rate, irq); +} +CLOCKSOURCE_OF_DECLARE(vf610, "fsl,vf610-pit", pit_timer_init); diff --git a/drivers/clocksource/zevio-timer.c b/drivers/clocksource/zevio-timer.c new file mode 100644 index 000000000000..ca81809d159d --- /dev/null +++ b/drivers/clocksource/zevio-timer.c @@ -0,0 +1,215 @@ +/* + * linux/drivers/clocksource/zevio-timer.c + * + * Copyright (C) 2013 Daniel Tang <tangrs@tangrs.id.au> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + */ + +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/clk.h> +#include <linux/clockchips.h> +#include <linux/cpumask.h> +#include <linux/interrupt.h> +#include <linux/slab.h> + +#define IO_CURRENT_VAL 0x00 +#define IO_DIVIDER 0x04 +#define IO_CONTROL 0x08 + +#define IO_TIMER1 0x00 +#define IO_TIMER2 0x0C + +#define IO_MATCH_BEGIN 0x18 +#define IO_MATCH(x) (IO_MATCH_BEGIN + ((x) << 2)) + +#define IO_INTR_STS 0x00 +#define IO_INTR_ACK 0x00 +#define IO_INTR_MSK 0x04 + +#define CNTL_STOP_TIMER (1 << 4) +#define CNTL_RUN_TIMER (0 << 4) + +#define CNTL_INC (1 << 3) +#define CNTL_DEC (0 << 3) + +#define CNTL_TOZERO 0 +#define CNTL_MATCH(x) ((x) + 1) +#define CNTL_FOREVER 7 + +/* There are 6 match registers but we only use one. */ +#define TIMER_MATCH 0 + +#define TIMER_INTR_MSK (1 << (TIMER_MATCH)) +#define TIMER_INTR_ALL 0x3F + +struct zevio_timer { + void __iomem *base; + void __iomem *timer1, *timer2; + void __iomem *interrupt_regs; + + struct clk *clk; + struct clock_event_device clkevt; + struct irqaction clkevt_irq; + + char clocksource_name[64]; + char clockevent_name[64]; +}; + +static int zevio_timer_set_event(unsigned long delta, + struct clock_event_device *dev) +{ + struct zevio_timer *timer = container_of(dev, struct zevio_timer, + clkevt); + + writel(delta, timer->timer1 + IO_CURRENT_VAL); + writel(CNTL_RUN_TIMER | CNTL_DEC | CNTL_MATCH(TIMER_MATCH), + timer->timer1 + IO_CONTROL); + + return 0; +} + +static void zevio_timer_set_mode(enum clock_event_mode mode, + struct clock_event_device *dev) +{ + struct zevio_timer *timer = container_of(dev, struct zevio_timer, + clkevt); + + switch (mode) { + case CLOCK_EVT_MODE_RESUME: + case CLOCK_EVT_MODE_ONESHOT: + /* Enable timer interrupts */ + writel(TIMER_INTR_MSK, timer->interrupt_regs + IO_INTR_MSK); + writel(TIMER_INTR_ALL, timer->interrupt_regs + IO_INTR_ACK); + break; + case CLOCK_EVT_MODE_SHUTDOWN: + case CLOCK_EVT_MODE_UNUSED: + /* Disable timer interrupts */ + writel(0, timer->interrupt_regs + IO_INTR_MSK); + writel(TIMER_INTR_ALL, timer->interrupt_regs + IO_INTR_ACK); + /* Stop timer */ + writel(CNTL_STOP_TIMER, timer->timer1 + IO_CONTROL); + break; + case CLOCK_EVT_MODE_PERIODIC: + default: + /* Unsupported */ + break; + } +} + +static irqreturn_t zevio_timer_interrupt(int irq, void *dev_id) +{ + struct zevio_timer *timer = dev_id; + u32 intr; + + intr = readl(timer->interrupt_regs + IO_INTR_ACK); + if (!(intr & TIMER_INTR_MSK)) + return IRQ_NONE; + + writel(TIMER_INTR_MSK, timer->interrupt_regs + IO_INTR_ACK); + writel(CNTL_STOP_TIMER, timer->timer1 + IO_CONTROL); + + if (timer->clkevt.event_handler) + timer->clkevt.event_handler(&timer->clkevt); + + return IRQ_HANDLED; +} + +static int __init zevio_timer_add(struct device_node *node) +{ + struct zevio_timer *timer; + struct resource res; + int irqnr, ret; + + timer = kzalloc(sizeof(*timer), GFP_KERNEL); + if (!timer) + return -ENOMEM; + + timer->base = of_iomap(node, 0); + if (!timer->base) { + ret = -EINVAL; + goto error_free; + } + timer->timer1 = timer->base + IO_TIMER1; + timer->timer2 = timer->base + IO_TIMER2; + + timer->clk = of_clk_get(node, 0); + if (IS_ERR(timer->clk)) { + ret = PTR_ERR(timer->clk); + pr_err("Timer clock not found! (error %d)\n", ret); + goto error_unmap; + } + + timer->interrupt_regs = of_iomap(node, 1); + irqnr = irq_of_parse_and_map(node, 0); + + of_address_to_resource(node, 0, &res); + scnprintf(timer->clocksource_name, sizeof(timer->clocksource_name), + "%llx.%s_clocksource", + (unsigned long long)res.start, node->name); + + scnprintf(timer->clockevent_name, sizeof(timer->clockevent_name), + "%llx.%s_clockevent", + (unsigned long long)res.start, node->name); + + if (timer->interrupt_regs && irqnr) { + timer->clkevt.name = timer->clockevent_name; + timer->clkevt.set_next_event = zevio_timer_set_event; + timer->clkevt.set_mode = zevio_timer_set_mode; + timer->clkevt.rating = 200; + timer->clkevt.cpumask = cpu_all_mask; + timer->clkevt.features = CLOCK_EVT_FEAT_ONESHOT; + timer->clkevt.irq = irqnr; + + writel(CNTL_STOP_TIMER, timer->timer1 + IO_CONTROL); + writel(0, timer->timer1 + IO_DIVIDER); + + /* Start with timer interrupts disabled */ + writel(0, timer->interrupt_regs + IO_INTR_MSK); + writel(TIMER_INTR_ALL, timer->interrupt_regs + IO_INTR_ACK); + + /* Interrupt to occur when timer value matches 0 */ + writel(0, timer->base + IO_MATCH(TIMER_MATCH)); + + timer->clkevt_irq.name = timer->clockevent_name; + timer->clkevt_irq.handler = zevio_timer_interrupt; + timer->clkevt_irq.dev_id = timer; + timer->clkevt_irq.flags = IRQF_TIMER | IRQF_IRQPOLL; + + setup_irq(irqnr, &timer->clkevt_irq); + + clockevents_config_and_register(&timer->clkevt, + clk_get_rate(timer->clk), 0x0001, 0xffff); + pr_info("Added %s as clockevent\n", timer->clockevent_name); + } + + writel(CNTL_STOP_TIMER, timer->timer2 + IO_CONTROL); + writel(0, timer->timer2 + IO_CURRENT_VAL); + writel(0, timer->timer2 + IO_DIVIDER); + writel(CNTL_RUN_TIMER | CNTL_FOREVER | CNTL_INC, + timer->timer2 + IO_CONTROL); + + clocksource_mmio_init(timer->timer2 + IO_CURRENT_VAL, + timer->clocksource_name, + clk_get_rate(timer->clk), + 200, 16, + clocksource_mmio_readw_up); + + pr_info("Added %s as clocksource\n", timer->clocksource_name); + + return 0; +error_unmap: + iounmap(timer->base); +error_free: + kfree(timer); + return ret; +} + +CLOCKSOURCE_OF_DECLARE(zevio_timer, "lsi,zevio-timer", zevio_timer_add); diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index dffb85525368..8ff7c230d82e 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -278,7 +278,7 @@ config CRYPTO_DEV_PICOXCELL config CRYPTO_DEV_SAHARA tristate "Support for SAHARA crypto accelerator" - depends on ARCH_MXC && EXPERIMENTAL && OF + depends on ARCH_MXC && OF select CRYPTO_BLKCIPHER select CRYPTO_AES select CRYPTO_ECB @@ -286,6 +286,16 @@ config CRYPTO_DEV_SAHARA This option enables support for the SAHARA HW crypto accelerator found in some Freescale i.MX chips. +config CRYPTO_DEV_DCP + tristate "Support for the DCP engine" + depends on ARCH_MXS && OF + select CRYPTO_BLKCIPHER + select CRYPTO_AES + select CRYPTO_CBC + help + This options enables support for the hardware crypto-acceleration + capabilities of the DCP co-processor + config CRYPTO_DEV_S5P tristate "Support for Samsung S5PV210 crypto accelerator" depends on ARCH_S5PV210 diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 38ce13d3b79b..b4946ddd2550 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o +obj-$(CONFIG_CRYPTO_DEV_DCP) += dcp.o obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 6e94bcd94678..f5d6deced1cb 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -202,6 +202,7 @@ static int caam_probe(struct platform_device *pdev) #ifdef CONFIG_DEBUG_FS struct caam_perfmon *perfmon; #endif + u64 cha_vid; ctrlpriv = kzalloc(sizeof(struct caam_drv_private), GFP_KERNEL); if (!ctrlpriv) @@ -293,11 +294,14 @@ static int caam_probe(struct platform_device *pdev) return -ENOMEM; } + cha_vid = rd_reg64(&topregs->ctrl.perfmon.cha_id); + /* - * RNG4 based SECs (v5+) need special initialization prior - * to executing any descriptors + * If SEC has RNG version >= 4 and RNG state handle has not been + * already instantiated ,do RNG instantiation */ - if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) { + if ((cha_vid & CHA_ID_RNG_MASK) >> CHA_ID_RNG_SHIFT >= 4 && + !(rd_reg32(&topregs->ctrl.r4tst[0].rdsta) & RDSTA_IF0)) { kick_trng(pdev); ret = instantiate_rng(ctrlpriv->jrdev[0]); if (ret) { diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index f7f833be8c67..53b296f78b0d 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -231,7 +231,12 @@ struct sec4_sg_entry { #define LDST_SRCDST_WORD_PKHA_B_SZ (0x11 << LDST_SRCDST_SHIFT) #define LDST_SRCDST_WORD_PKHA_N_SZ (0x12 << LDST_SRCDST_SHIFT) #define LDST_SRCDST_WORD_PKHA_E_SZ (0x13 << LDST_SRCDST_SHIFT) +#define LDST_SRCDST_WORD_CLASS_CTX (0x20 << LDST_SRCDST_SHIFT) #define LDST_SRCDST_WORD_DESCBUF (0x40 << LDST_SRCDST_SHIFT) +#define LDST_SRCDST_WORD_DESCBUF_JOB (0x41 << LDST_SRCDST_SHIFT) +#define LDST_SRCDST_WORD_DESCBUF_SHARED (0x42 << LDST_SRCDST_SHIFT) +#define LDST_SRCDST_WORD_DESCBUF_JOB_WE (0x45 << LDST_SRCDST_SHIFT) +#define LDST_SRCDST_WORD_DESCBUF_SHARED_WE (0x46 << LDST_SRCDST_SHIFT) #define LDST_SRCDST_WORD_INFO_FIFO (0x7a << LDST_SRCDST_SHIFT) /* Offset in source/destination */ @@ -366,6 +371,7 @@ struct sec4_sg_entry { #define FIFOLD_TYPE_LAST2FLUSH1 (0x05 << FIFOLD_TYPE_SHIFT) #define FIFOLD_TYPE_LASTBOTH (0x06 << FIFOLD_TYPE_SHIFT) #define FIFOLD_TYPE_LASTBOTHFL (0x07 << FIFOLD_TYPE_SHIFT) +#define FIFOLD_TYPE_NOINFOFIFO (0x0F << FIFOLD_TYPE_SHIFT) #define FIFOLDST_LEN_MASK 0xffff #define FIFOLDST_EXT_LEN_MASK 0xffffffff @@ -1294,10 +1300,10 @@ struct sec4_sg_entry { #define SQOUT_SGF 0x01000000 /* Appends to a previous pointer */ -#define SQOUT_PRE 0x00800000 +#define SQOUT_PRE SQIN_PRE /* Restore sequence with pointer/length */ -#define SQOUT_RTO 0x00200000 +#define SQOUT_RTO SQIN_RTO /* Use extended length following pointer */ #define SQOUT_EXT 0x00400000 @@ -1359,6 +1365,7 @@ struct sec4_sg_entry { #define MOVE_DEST_MATH3 (0x07 << MOVE_DEST_SHIFT) #define MOVE_DEST_CLASS1INFIFO (0x08 << MOVE_DEST_SHIFT) #define MOVE_DEST_CLASS2INFIFO (0x09 << MOVE_DEST_SHIFT) +#define MOVE_DEST_INFIFO_NOINFO (0x0a << MOVE_DEST_SHIFT) #define MOVE_DEST_PK_A (0x0c << MOVE_DEST_SHIFT) #define MOVE_DEST_CLASS1KEY (0x0d << MOVE_DEST_SHIFT) #define MOVE_DEST_CLASS2KEY (0x0e << MOVE_DEST_SHIFT) @@ -1411,6 +1418,7 @@ struct sec4_sg_entry { #define MATH_SRC0_REG2 (0x02 << MATH_SRC0_SHIFT) #define MATH_SRC0_REG3 (0x03 << MATH_SRC0_SHIFT) #define MATH_SRC0_IMM (0x04 << MATH_SRC0_SHIFT) +#define MATH_SRC0_DPOVRD (0x07 << MATH_SRC0_SHIFT) #define MATH_SRC0_SEQINLEN (0x08 << MATH_SRC0_SHIFT) #define MATH_SRC0_SEQOUTLEN (0x09 << MATH_SRC0_SHIFT) #define MATH_SRC0_VARSEQINLEN (0x0a << MATH_SRC0_SHIFT) @@ -1425,6 +1433,7 @@ struct sec4_sg_entry { #define MATH_SRC1_REG2 (0x02 << MATH_SRC1_SHIFT) #define MATH_SRC1_REG3 (0x03 << MATH_SRC1_SHIFT) #define MATH_SRC1_IMM (0x04 << MATH_SRC1_SHIFT) +#define MATH_SRC1_DPOVRD (0x07 << MATH_SRC0_SHIFT) #define MATH_SRC1_INFIFO (0x0a << MATH_SRC1_SHIFT) #define MATH_SRC1_OUTFIFO (0x0b << MATH_SRC1_SHIFT) #define MATH_SRC1_ONE (0x0c << MATH_SRC1_SHIFT) @@ -1600,4 +1609,13 @@ struct sec4_sg_entry { #define NFIFOENTRY_PLEN_SHIFT 0 #define NFIFOENTRY_PLEN_MASK (0xFF << NFIFOENTRY_PLEN_SHIFT) +/* Append Load Immediate Command */ +#define FD_CMD_APPEND_LOAD_IMMEDIATE 0x80000000 + +/* Set SEQ LIODN equal to the Non-SEQ LIODN for the job */ +#define FD_CMD_SET_SEQ_LIODN_EQUAL_NONSEQ_LIODN 0x40000000 + +/* Frame Descriptor Command for Replacement Job Descriptor */ +#define FD_CMD_REPLACE_JOB_DESC 0x20000000 + #endif /* DESC_H */ diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index c85c1f058401..fe3bfd1b08ca 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -110,6 +110,26 @@ static inline void append_cmd(u32 *desc, u32 command) (*desc)++; } +#define append_u32 append_cmd + +static inline void append_u64(u32 *desc, u64 data) +{ + u32 *offset = desc_end(desc); + + *offset = upper_32_bits(data); + *(++offset) = lower_32_bits(data); + + (*desc) += 2; +} + +/* Write command without affecting header, and return pointer to next word */ +static inline u32 *write_cmd(u32 *desc, u32 command) +{ + *desc = command; + + return desc + 1; +} + static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len, u32 command) { @@ -122,7 +142,8 @@ static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr, unsigned int len, u32 command) { append_cmd(desc, command); - append_ptr(desc, ptr); + if (!(command & (SQIN_RTO | SQIN_PRE))) + append_ptr(desc, ptr); append_cmd(desc, len); } @@ -176,17 +197,36 @@ static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \ } APPEND_CMD_PTR(key, KEY) APPEND_CMD_PTR(load, LOAD) -APPEND_CMD_PTR(store, STORE) APPEND_CMD_PTR(fifo_load, FIFO_LOAD) APPEND_CMD_PTR(fifo_store, FIFO_STORE) +static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len, + u32 options) +{ + u32 cmd_src; + + cmd_src = options & LDST_SRCDST_MASK; + + append_cmd(desc, CMD_STORE | options | len); + + /* The following options do not require pointer */ + if (!(cmd_src == LDST_SRCDST_WORD_DESCBUF_SHARED || + cmd_src == LDST_SRCDST_WORD_DESCBUF_JOB || + cmd_src == LDST_SRCDST_WORD_DESCBUF_JOB_WE || + cmd_src == LDST_SRCDST_WORD_DESCBUF_SHARED_WE)) + append_ptr(desc, ptr); +} + #define APPEND_SEQ_PTR_INTLEN(cmd, op) \ static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \ unsigned int len, \ u32 options) \ { \ PRINT_POS; \ - append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \ + if (options & (SQIN_RTO | SQIN_PRE)) \ + append_cmd(desc, CMD_SEQ_##op##_PTR | len | options); \ + else \ + append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \ } APPEND_SEQ_PTR_INTLEN(in, IN) APPEND_SEQ_PTR_INTLEN(out, OUT) @@ -259,7 +299,7 @@ APPEND_CMD_RAW_IMM(load, LOAD, u32); */ #define APPEND_MATH(op, desc, dest, src_0, src_1, len) \ append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \ - MATH_SRC0_##src_0 | MATH_SRC1_##src_1 | (u32) (len & MATH_LEN_MASK)); + MATH_SRC0_##src_0 | MATH_SRC1_##src_1 | (u32)len); #define append_math_add(desc, dest, src0, src1, len) \ APPEND_MATH(ADD, desc, dest, src0, src1, len) @@ -279,6 +319,8 @@ append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \ APPEND_MATH(LSHIFT, desc, dest, src0, src1, len) #define append_math_rshift(desc, dest, src0, src1, len) \ APPEND_MATH(RSHIFT, desc, dest, src0, src1, len) +#define append_math_ldshift(desc, dest, src0, src1, len) \ + APPEND_MATH(SHLD, desc, dest, src0, src1, len) /* Exactly one source is IMM. Data is passed in as u32 value */ #define APPEND_MATH_IMM_u32(op, desc, dest, src_0, src_1, data) \ @@ -305,3 +347,34 @@ do { \ APPEND_MATH_IMM_u32(LSHIFT, desc, dest, src0, src1, data) #define append_math_rshift_imm_u32(desc, dest, src0, src1, data) \ APPEND_MATH_IMM_u32(RSHIFT, desc, dest, src0, src1, data) + +/* Exactly one source is IMM. Data is passed in as u64 value */ +#define APPEND_MATH_IMM_u64(op, desc, dest, src_0, src_1, data) \ +do { \ + u32 upper = (data >> 16) >> 16; \ + APPEND_MATH(op, desc, dest, src_0, src_1, CAAM_CMD_SZ * 2 | \ + (upper ? 0 : MATH_IFB)); \ + if (upper) \ + append_u64(desc, data); \ + else \ + append_u32(desc, data); \ +} while (0) + +#define append_math_add_imm_u64(desc, dest, src0, src1, data) \ + APPEND_MATH_IMM_u64(ADD, desc, dest, src0, src1, data) +#define append_math_sub_imm_u64(desc, dest, src0, src1, data) \ + APPEND_MATH_IMM_u64(SUB, desc, dest, src0, src1, data) +#define append_math_add_c_imm_u64(desc, dest, src0, src1, data) \ + APPEND_MATH_IMM_u64(ADDC, desc, dest, src0, src1, data) +#define append_math_sub_b_imm_u64(desc, dest, src0, src1, data) \ + APPEND_MATH_IMM_u64(SUBB, desc, dest, src0, src1, data) +#define append_math_and_imm_u64(desc, dest, src0, src1, data) \ + APPEND_MATH_IMM_u64(AND, desc, dest, src0, src1, data) +#define append_math_or_imm_u64(desc, dest, src0, src1, data) \ + APPEND_MATH_IMM_u64(OR, desc, dest, src0, src1, data) +#define append_math_xor_imm_u64(desc, dest, src0, src1, data) \ + APPEND_MATH_IMM_u64(XOR, desc, dest, src0, src1, data) +#define append_math_lshift_imm_u64(desc, dest, src0, src1, data) \ + APPEND_MATH_IMM_u64(LSHIFT, desc, dest, src0, src1, data) +#define append_math_rshift_imm_u64(desc, dest, src0, src1, data) \ + APPEND_MATH_IMM_u64(RSHIFT, desc, dest, src0, src1, data) diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h index 62950d22ac13..3a87c0cf879a 100644 --- a/drivers/crypto/caam/pdb.h +++ b/drivers/crypto/caam/pdb.h @@ -44,6 +44,7 @@ #define PDBOPTS_ESP_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */ #define PDBOPTS_ESP_INCIPHDR 0x04 /* Prepend IP header to output frame */ #define PDBOPTS_ESP_IPVSN 0x02 /* process IPv6 header */ +#define PDBOPTS_ESP_AOFL 0x04 /* adjust out frame len (decap, SEC>=5.3)*/ #define PDBOPTS_ESP_TUNNEL 0x01 /* tunnel mode next-header byte */ #define PDBOPTS_ESP_IPV6 0x02 /* ip header version is V6 */ #define PDBOPTS_ESP_DIFFSERV 0x40 /* copy TOS/TC from inner iphdr */ diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index cd6fedad9935..c09142fc13e3 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -117,6 +117,43 @@ struct jr_outentry { #define CHA_NUM_DECONUM_SHIFT 56 #define CHA_NUM_DECONUM_MASK (0xfull << CHA_NUM_DECONUM_SHIFT) +/* CHA Version IDs */ +#define CHA_ID_AES_SHIFT 0 +#define CHA_ID_AES_MASK (0xfull << CHA_ID_AES_SHIFT) + +#define CHA_ID_DES_SHIFT 4 +#define CHA_ID_DES_MASK (0xfull << CHA_ID_DES_SHIFT) + +#define CHA_ID_ARC4_SHIFT 8 +#define CHA_ID_ARC4_MASK (0xfull << CHA_ID_ARC4_SHIFT) + +#define CHA_ID_MD_SHIFT 12 +#define CHA_ID_MD_MASK (0xfull << CHA_ID_MD_SHIFT) + +#define CHA_ID_RNG_SHIFT 16 +#define CHA_ID_RNG_MASK (0xfull << CHA_ID_RNG_SHIFT) + +#define CHA_ID_SNW8_SHIFT 20 +#define CHA_ID_SNW8_MASK (0xfull << CHA_ID_SNW8_SHIFT) + +#define CHA_ID_KAS_SHIFT 24 +#define CHA_ID_KAS_MASK (0xfull << CHA_ID_KAS_SHIFT) + +#define CHA_ID_PK_SHIFT 28 +#define CHA_ID_PK_MASK (0xfull << CHA_ID_PK_SHIFT) + +#define CHA_ID_CRC_SHIFT 32 +#define CHA_ID_CRC_MASK (0xfull << CHA_ID_CRC_SHIFT) + +#define CHA_ID_SNW9_SHIFT 36 +#define CHA_ID_SNW9_MASK (0xfull << CHA_ID_SNW9_SHIFT) + +#define CHA_ID_DECO_SHIFT 56 +#define CHA_ID_DECO_MASK (0xfull << CHA_ID_DECO_SHIFT) + +#define CHA_ID_JR_SHIFT 60 +#define CHA_ID_JR_MASK (0xfull << CHA_ID_JR_SHIFT) + struct sec_vid { u16 ip_id; u8 maj_rev; @@ -228,7 +265,10 @@ struct rng4tst { u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */ u32 rtfrqcnt; /* PRGM=0: freq. count register */ }; - u32 rsvd1[56]; + u32 rsvd1[40]; +#define RDSTA_IF0 0x00000001 + u32 rdsta; + u32 rsvd2[15]; }; /* diff --git a/drivers/crypto/dcp.c b/drivers/crypto/dcp.c new file mode 100644 index 000000000000..a8a7dd4b0d25 --- /dev/null +++ b/drivers/crypto/dcp.c @@ -0,0 +1,912 @@ +/* + * Cryptographic API. + * + * Support for DCP cryptographic accelerator. + * + * Copyright (c) 2013 + * Author: Tobias Rauter <tobias.rauter@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Based on tegra-aes.c, dcp.c (from freescale SDK) and sahara.c + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> +#include <linux/dma-mapping.h> +#include <linux/io.h> +#include <linux/mutex.h> +#include <linux/interrupt.h> +#include <linux/completion.h> +#include <linux/workqueue.h> +#include <linux/delay.h> +#include <linux/crypto.h> +#include <linux/miscdevice.h> + +#include <crypto/scatterwalk.h> +#include <crypto/aes.h> + + +/* IOCTL for DCP OTP Key AES - taken from Freescale's SDK*/ +#define DBS_IOCTL_BASE 'd' +#define DBS_ENC _IOW(DBS_IOCTL_BASE, 0x00, uint8_t[16]) +#define DBS_DEC _IOW(DBS_IOCTL_BASE, 0x01, uint8_t[16]) + +/* DCP channel used for AES */ +#define USED_CHANNEL 1 +/* Ring Buffers' maximum size */ +#define DCP_MAX_PKG 20 + +/* Control Register */ +#define DCP_REG_CTRL 0x000 +#define DCP_CTRL_SFRST (1<<31) +#define DCP_CTRL_CLKGATE (1<<30) +#define DCP_CTRL_CRYPTO_PRESENT (1<<29) +#define DCP_CTRL_SHA_PRESENT (1<<28) +#define DCP_CTRL_GATHER_RES_WRITE (1<<23) +#define DCP_CTRL_ENABLE_CONTEXT_CACHE (1<<22) +#define DCP_CTRL_ENABLE_CONTEXT_SWITCH (1<<21) +#define DCP_CTRL_CH_IRQ_E_0 0x01 +#define DCP_CTRL_CH_IRQ_E_1 0x02 +#define DCP_CTRL_CH_IRQ_E_2 0x04 +#define DCP_CTRL_CH_IRQ_E_3 0x08 + +/* Status register */ +#define DCP_REG_STAT 0x010 +#define DCP_STAT_OTP_KEY_READY (1<<28) +#define DCP_STAT_CUR_CHANNEL(stat) ((stat>>24)&0x0F) +#define DCP_STAT_READY_CHANNEL(stat) ((stat>>16)&0x0F) +#define DCP_STAT_IRQ(stat) (stat&0x0F) +#define DCP_STAT_CHAN_0 (0x01) +#define DCP_STAT_CHAN_1 (0x02) +#define DCP_STAT_CHAN_2 (0x04) +#define DCP_STAT_CHAN_3 (0x08) + +/* Channel Control Register */ +#define DCP_REG_CHAN_CTRL 0x020 +#define DCP_CHAN_CTRL_CH0_IRQ_MERGED (1<<16) +#define DCP_CHAN_CTRL_HIGH_PRIO_0 (0x0100) +#define DCP_CHAN_CTRL_HIGH_PRIO_1 (0x0200) +#define DCP_CHAN_CTRL_HIGH_PRIO_2 (0x0400) +#define DCP_CHAN_CTRL_HIGH_PRIO_3 (0x0800) +#define DCP_CHAN_CTRL_ENABLE_0 (0x01) +#define DCP_CHAN_CTRL_ENABLE_1 (0x02) +#define DCP_CHAN_CTRL_ENABLE_2 (0x04) +#define DCP_CHAN_CTRL_ENABLE_3 (0x08) + +/* + * Channel Registers: + * The DCP has 4 channels. Each of this channels + * has 4 registers (command pointer, semaphore, status and options). + * The address of register REG of channel CHAN is obtained by + * dcp_chan_reg(REG, CHAN) + */ +#define DCP_REG_CHAN_PTR 0x00000100 +#define DCP_REG_CHAN_SEMA 0x00000110 +#define DCP_REG_CHAN_STAT 0x00000120 +#define DCP_REG_CHAN_OPT 0x00000130 + +#define DCP_CHAN_STAT_NEXT_CHAIN_IS_0 0x010000 +#define DCP_CHAN_STAT_NO_CHAIN 0x020000 +#define DCP_CHAN_STAT_CONTEXT_ERROR 0x030000 +#define DCP_CHAN_STAT_PAYLOAD_ERROR 0x040000 +#define DCP_CHAN_STAT_INVALID_MODE 0x050000 +#define DCP_CHAN_STAT_PAGEFAULT 0x40 +#define DCP_CHAN_STAT_DST 0x20 +#define DCP_CHAN_STAT_SRC 0x10 +#define DCP_CHAN_STAT_PACKET 0x08 +#define DCP_CHAN_STAT_SETUP 0x04 +#define DCP_CHAN_STAT_MISMATCH 0x02 + +/* hw packet control*/ + +#define DCP_PKT_PAYLOAD_KEY (1<<11) +#define DCP_PKT_OTP_KEY (1<<10) +#define DCP_PKT_CIPHER_INIT (1<<9) +#define DCP_PKG_CIPHER_ENCRYPT (1<<8) +#define DCP_PKT_CIPHER_ENABLE (1<<5) +#define DCP_PKT_DECR_SEM (1<<1) +#define DCP_PKT_CHAIN (1<<2) +#define DCP_PKT_IRQ 1 + +#define DCP_PKT_MODE_CBC (1<<4) +#define DCP_PKT_KEYSELECT_OTP (0xFF<<8) + +/* cipher flags */ +#define DCP_ENC 0x0001 +#define DCP_DEC 0x0002 +#define DCP_ECB 0x0004 +#define DCP_CBC 0x0008 +#define DCP_CBC_INIT 0x0010 +#define DCP_NEW_KEY 0x0040 +#define DCP_OTP_KEY 0x0080 +#define DCP_AES 0x1000 + +/* DCP Flags */ +#define DCP_FLAG_BUSY 0x01 +#define DCP_FLAG_PRODUCING 0x02 + +/* clock defines */ +#define CLOCK_ON 1 +#define CLOCK_OFF 0 + +struct dcp_dev_req_ctx { + int mode; +}; + +struct dcp_op { + unsigned int flags; + u8 key[AES_KEYSIZE_128]; + int keylen; + + struct ablkcipher_request *req; + struct crypto_ablkcipher *fallback; + + uint32_t stat; + uint32_t pkt1; + uint32_t pkt2; + struct ablkcipher_walk walk; +}; + +struct dcp_dev { + struct device *dev; + void __iomem *dcp_regs_base; + + int dcp_vmi_irq; + int dcp_irq; + + spinlock_t queue_lock; + struct crypto_queue queue; + + uint32_t pkt_produced; + uint32_t pkt_consumed; + + struct dcp_hw_packet *hw_pkg[DCP_MAX_PKG]; + dma_addr_t hw_phys_pkg; + + /* [KEY][IV] Both with 16 Bytes */ + u8 *payload_base; + dma_addr_t payload_base_dma; + + + struct tasklet_struct done_task; + struct tasklet_struct queue_task; + struct timer_list watchdog; + + unsigned long flags; + + struct dcp_op *ctx; + + struct miscdevice dcp_bootstream_misc; +}; + +struct dcp_hw_packet { + uint32_t next; + uint32_t pkt1; + uint32_t pkt2; + uint32_t src; + uint32_t dst; + uint32_t size; + uint32_t payload; + uint32_t stat; +}; + +static struct dcp_dev *global_dev; + +static inline u32 dcp_chan_reg(u32 reg, int chan) +{ + return reg + (chan) * 0x40; +} + +static inline void dcp_write(struct dcp_dev *dev, u32 data, u32 reg) +{ + writel(data, dev->dcp_regs_base + reg); +} + +static inline void dcp_set(struct dcp_dev *dev, u32 data, u32 reg) +{ + writel(data, dev->dcp_regs_base + (reg | 0x04)); +} + +static inline void dcp_clear(struct dcp_dev *dev, u32 data, u32 reg) +{ + writel(data, dev->dcp_regs_base + (reg | 0x08)); +} + +static inline void dcp_toggle(struct dcp_dev *dev, u32 data, u32 reg) +{ + writel(data, dev->dcp_regs_base + (reg | 0x0C)); +} + +static inline unsigned int dcp_read(struct dcp_dev *dev, u32 reg) +{ + return readl(dev->dcp_regs_base + reg); +} + +static void dcp_dma_unmap(struct dcp_dev *dev, struct dcp_hw_packet *pkt) +{ + dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE); + dma_unmap_page(dev->dev, pkt->dst, pkt->size, DMA_FROM_DEVICE); + dev_dbg(dev->dev, "unmap packet %x", (unsigned int) pkt); +} + +static int dcp_dma_map(struct dcp_dev *dev, + struct ablkcipher_walk *walk, struct dcp_hw_packet *pkt) +{ + dev_dbg(dev->dev, "map packet %x", (unsigned int) pkt); + /* align to length = 16 */ + pkt->size = walk->nbytes - (walk->nbytes % 16); + + pkt->src = dma_map_page(dev->dev, walk->src.page, walk->src.offset, + pkt->size, DMA_TO_DEVICE); + + if (pkt->src == 0) { + dev_err(dev->dev, "Unable to map src"); + return -ENOMEM; + } + + pkt->dst = dma_map_page(dev->dev, walk->dst.page, walk->dst.offset, + pkt->size, DMA_FROM_DEVICE); + + if (pkt->dst == 0) { + dev_err(dev->dev, "Unable to map dst"); + dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE); + return -ENOMEM; + } + + return 0; +} + +static void dcp_op_one(struct dcp_dev *dev, struct dcp_hw_packet *pkt, + uint8_t last) +{ + struct dcp_op *ctx = dev->ctx; + pkt->pkt1 = ctx->pkt1; + pkt->pkt2 = ctx->pkt2; + + pkt->payload = (u32) dev->payload_base_dma; + pkt->stat = 0; + + if (ctx->flags & DCP_CBC_INIT) { + pkt->pkt1 |= DCP_PKT_CIPHER_INIT; + ctx->flags &= ~DCP_CBC_INIT; + } + + mod_timer(&dev->watchdog, jiffies + msecs_to_jiffies(500)); + pkt->pkt1 |= DCP_PKT_IRQ; + if (!last) + pkt->pkt1 |= DCP_PKT_CHAIN; + + dev->pkt_produced++; + + dcp_write(dev, 1, + dcp_chan_reg(DCP_REG_CHAN_SEMA, USED_CHANNEL)); +} + +static void dcp_op_proceed(struct dcp_dev *dev) +{ + struct dcp_op *ctx = dev->ctx; + struct dcp_hw_packet *pkt; + + while (ctx->walk.nbytes) { + int err = 0; + + pkt = dev->hw_pkg[dev->pkt_produced % DCP_MAX_PKG]; + err = dcp_dma_map(dev, &ctx->walk, pkt); + if (err) { + dev->ctx->stat |= err; + /* start timer to wait for already set up calls */ + mod_timer(&dev->watchdog, + jiffies + msecs_to_jiffies(500)); + break; + } + + + err = ctx->walk.nbytes - pkt->size; + ablkcipher_walk_done(dev->ctx->req, &dev->ctx->walk, err); + + dcp_op_one(dev, pkt, ctx->walk.nbytes == 0); + /* we have to wait if no space is left in buffer */ + if (dev->pkt_produced - dev->pkt_consumed == DCP_MAX_PKG) + break; + } + clear_bit(DCP_FLAG_PRODUCING, &dev->flags); +} + +static void dcp_op_start(struct dcp_dev *dev, uint8_t use_walk) +{ + struct dcp_op *ctx = dev->ctx; + + if (ctx->flags & DCP_NEW_KEY) { + memcpy(dev->payload_base, ctx->key, ctx->keylen); + ctx->flags &= ~DCP_NEW_KEY; + } + + ctx->pkt1 = 0; + ctx->pkt1 |= DCP_PKT_CIPHER_ENABLE; + ctx->pkt1 |= DCP_PKT_DECR_SEM; + + if (ctx->flags & DCP_OTP_KEY) + ctx->pkt1 |= DCP_PKT_OTP_KEY; + else + ctx->pkt1 |= DCP_PKT_PAYLOAD_KEY; + + if (ctx->flags & DCP_ENC) + ctx->pkt1 |= DCP_PKG_CIPHER_ENCRYPT; + + ctx->pkt2 = 0; + if (ctx->flags & DCP_CBC) + ctx->pkt2 |= DCP_PKT_MODE_CBC; + + dev->pkt_produced = 0; + dev->pkt_consumed = 0; + + ctx->stat = 0; + dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL)); + dcp_write(dev, (u32) dev->hw_phys_pkg, + dcp_chan_reg(DCP_REG_CHAN_PTR, USED_CHANNEL)); + + set_bit(DCP_FLAG_PRODUCING, &dev->flags); + + if (use_walk) { + ablkcipher_walk_init(&ctx->walk, ctx->req->dst, + ctx->req->src, ctx->req->nbytes); + ablkcipher_walk_phys(ctx->req, &ctx->walk); + dcp_op_proceed(dev); + } else { + dcp_op_one(dev, dev->hw_pkg[0], 1); + clear_bit(DCP_FLAG_PRODUCING, &dev->flags); + } +} + +static void dcp_done_task(unsigned long data) +{ + struct dcp_dev *dev = (struct dcp_dev *)data; + struct dcp_hw_packet *last_packet; + int fin; + fin = 0; + + for (last_packet = dev->hw_pkg[(dev->pkt_consumed) % DCP_MAX_PKG]; + last_packet->stat == 1; + last_packet = + dev->hw_pkg[++(dev->pkt_consumed) % DCP_MAX_PKG]) { + + dcp_dma_unmap(dev, last_packet); + last_packet->stat = 0; + fin++; + } + /* the last call of this function already consumed this IRQ's packet */ + if (fin == 0) + return; + + dev_dbg(dev->dev, + "Packet(s) done with status %x; finished: %d, produced:%d, complete consumed: %d", + dev->ctx->stat, fin, dev->pkt_produced, dev->pkt_consumed); + + last_packet = dev->hw_pkg[(dev->pkt_consumed - 1) % DCP_MAX_PKG]; + if (!dev->ctx->stat && last_packet->pkt1 & DCP_PKT_CHAIN) { + if (!test_and_set_bit(DCP_FLAG_PRODUCING, &dev->flags)) + dcp_op_proceed(dev); + return; + } + + while (unlikely(dev->pkt_consumed < dev->pkt_produced)) { + dcp_dma_unmap(dev, + dev->hw_pkg[dev->pkt_consumed++ % DCP_MAX_PKG]); + } + + if (dev->ctx->flags & DCP_OTP_KEY) { + /* we used the miscdevice, no walk to finish */ + clear_bit(DCP_FLAG_BUSY, &dev->flags); + return; + } + + ablkcipher_walk_complete(&dev->ctx->walk); + dev->ctx->req->base.complete(&dev->ctx->req->base, + dev->ctx->stat); + dev->ctx->req = NULL; + /* in case there are other requests in the queue */ + tasklet_schedule(&dev->queue_task); +} + +static void dcp_watchdog(unsigned long data) +{ + struct dcp_dev *dev = (struct dcp_dev *)data; + dev->ctx->stat |= dcp_read(dev, + dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL)); + + dev_err(dev->dev, "Timeout, Channel status: %x", dev->ctx->stat); + + if (!dev->ctx->stat) + dev->ctx->stat = -ETIMEDOUT; + + dcp_done_task(data); +} + + +static irqreturn_t dcp_common_irq(int irq, void *context) +{ + u32 msk; + struct dcp_dev *dev = (struct dcp_dev *) context; + + del_timer(&dev->watchdog); + + msk = DCP_STAT_IRQ(dcp_read(dev, DCP_REG_STAT)); + dcp_clear(dev, msk, DCP_REG_STAT); + if (msk == 0) + return IRQ_NONE; + + dev->ctx->stat |= dcp_read(dev, + dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL)); + + if (msk & DCP_STAT_CHAN_1) + tasklet_schedule(&dev->done_task); + + return IRQ_HANDLED; +} + +static irqreturn_t dcp_vmi_irq(int irq, void *context) +{ + return dcp_common_irq(irq, context); +} + +static irqreturn_t dcp_irq(int irq, void *context) +{ + return dcp_common_irq(irq, context); +} + +static void dcp_crypt(struct dcp_dev *dev, struct dcp_op *ctx) +{ + dev->ctx = ctx; + + if ((ctx->flags & DCP_CBC) && ctx->req->info) { + ctx->flags |= DCP_CBC_INIT; + memcpy(dev->payload_base + AES_KEYSIZE_128, + ctx->req->info, AES_KEYSIZE_128); + } + + dcp_op_start(dev, 1); +} + +static void dcp_queue_task(unsigned long data) +{ + struct dcp_dev *dev = (struct dcp_dev *) data; + struct crypto_async_request *async_req, *backlog; + struct crypto_ablkcipher *tfm; + struct dcp_op *ctx; + struct dcp_dev_req_ctx *rctx; + struct ablkcipher_request *req; + unsigned long flags; + + spin_lock_irqsave(&dev->queue_lock, flags); + + backlog = crypto_get_backlog(&dev->queue); + async_req = crypto_dequeue_request(&dev->queue); + + spin_unlock_irqrestore(&dev->queue_lock, flags); + + if (!async_req) + goto ret_nothing_done; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + req = ablkcipher_request_cast(async_req); + tfm = crypto_ablkcipher_reqtfm(req); + rctx = ablkcipher_request_ctx(req); + ctx = crypto_ablkcipher_ctx(tfm); + + if (!req->src || !req->dst) + goto ret_nothing_done; + + ctx->flags |= rctx->mode; + ctx->req = req; + + dcp_crypt(dev, ctx); + + return; + +ret_nothing_done: + clear_bit(DCP_FLAG_BUSY, &dev->flags); +} + + +static int dcp_cra_init(struct crypto_tfm *tfm) +{ + const char *name = tfm->__crt_alg->cra_name; + struct dcp_op *ctx = crypto_tfm_ctx(tfm); + + tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_dev_req_ctx); + + ctx->fallback = crypto_alloc_ablkcipher(name, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(ctx->fallback)) { + dev_err(global_dev->dev, "Error allocating fallback algo %s\n", + name); + return PTR_ERR(ctx->fallback); + } + + return 0; +} + +static void dcp_cra_exit(struct crypto_tfm *tfm) +{ + struct dcp_op *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback) + crypto_free_ablkcipher(ctx->fallback); + + ctx->fallback = NULL; +} + +/* async interface */ +static int dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int len) +{ + struct dcp_op *ctx = crypto_ablkcipher_ctx(tfm); + unsigned int ret = 0; + ctx->keylen = len; + ctx->flags = 0; + if (len == AES_KEYSIZE_128) { + if (memcmp(ctx->key, key, AES_KEYSIZE_128)) { + memcpy(ctx->key, key, len); + ctx->flags |= DCP_NEW_KEY; + } + return 0; + } + + ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + ctx->fallback->base.crt_flags |= + (tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); + + ret = crypto_ablkcipher_setkey(ctx->fallback, key, len); + if (ret) { + struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm); + + tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm_aux->crt_flags |= + (ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK); + } + return ret; +} + +static int dcp_aes_cbc_crypt(struct ablkcipher_request *req, int mode) +{ + struct dcp_dev_req_ctx *rctx = ablkcipher_request_ctx(req); + struct dcp_dev *dev = global_dev; + unsigned long flags; + int err = 0; + + if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) + return -EINVAL; + + rctx->mode = mode; + + spin_lock_irqsave(&dev->queue_lock, flags); + err = ablkcipher_enqueue_request(&dev->queue, req); + spin_unlock_irqrestore(&dev->queue_lock, flags); + + flags = test_and_set_bit(DCP_FLAG_BUSY, &dev->flags); + + if (!(flags & DCP_FLAG_BUSY)) + tasklet_schedule(&dev->queue_task); + + return err; +} + +static int dcp_aes_cbc_encrypt(struct ablkcipher_request *req) +{ + struct crypto_tfm *tfm = + crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); + struct dcp_op *ctx = crypto_ablkcipher_ctx( + crypto_ablkcipher_reqtfm(req)); + + if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { + int err = 0; + ablkcipher_request_set_tfm(req, ctx->fallback); + err = crypto_ablkcipher_encrypt(req); + ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + return err; + } + + return dcp_aes_cbc_crypt(req, DCP_AES | DCP_ENC | DCP_CBC); +} + +static int dcp_aes_cbc_decrypt(struct ablkcipher_request *req) +{ + struct crypto_tfm *tfm = + crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); + struct dcp_op *ctx = crypto_ablkcipher_ctx( + crypto_ablkcipher_reqtfm(req)); + + if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { + int err = 0; + ablkcipher_request_set_tfm(req, ctx->fallback); + err = crypto_ablkcipher_decrypt(req); + ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + return err; + } + return dcp_aes_cbc_crypt(req, DCP_AES | DCP_DEC | DCP_CBC); +} + +static struct crypto_alg algs[] = { + { + .cra_name = "cbc(aes)", + .cra_driver_name = "dcp-cbc-aes", + .cra_alignmask = 3, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_KEYSIZE_128, + .cra_type = &crypto_ablkcipher_type, + .cra_priority = 300, + .cra_u.ablkcipher = { + .min_keysize = AES_KEYSIZE_128, + .max_keysize = AES_KEYSIZE_128, + .setkey = dcp_aes_setkey, + .encrypt = dcp_aes_cbc_encrypt, + .decrypt = dcp_aes_cbc_decrypt, + .ivsize = AES_KEYSIZE_128, + } + + }, +}; + +/* DCP bootstream verification interface: uses OTP key for crypto */ +static int dcp_bootstream_open(struct inode *inode, struct file *file) +{ + file->private_data = container_of((file->private_data), + struct dcp_dev, dcp_bootstream_misc); + return 0; +} + +static long dcp_bootstream_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct dcp_dev *dev = (struct dcp_dev *) file->private_data; + void __user *argp = (void __user *)arg; + int ret; + + if (dev == NULL) + return -EBADF; + + if (cmd != DBS_ENC && cmd != DBS_DEC) + return -EINVAL; + + if (copy_from_user(dev->payload_base, argp, 16)) + return -EFAULT; + + if (test_and_set_bit(DCP_FLAG_BUSY, &dev->flags)) + return -EAGAIN; + + dev->ctx = kzalloc(sizeof(struct dcp_op), GFP_KERNEL); + if (!dev->ctx) { + dev_err(dev->dev, + "cannot allocate context for OTP crypto"); + clear_bit(DCP_FLAG_BUSY, &dev->flags); + return -ENOMEM; + } + + dev->ctx->flags = DCP_AES | DCP_ECB | DCP_OTP_KEY | DCP_CBC_INIT; + dev->ctx->flags |= (cmd == DBS_ENC) ? DCP_ENC : DCP_DEC; + dev->hw_pkg[0]->src = dev->payload_base_dma; + dev->hw_pkg[0]->dst = dev->payload_base_dma; + dev->hw_pkg[0]->size = 16; + + dcp_op_start(dev, 0); + + while (test_bit(DCP_FLAG_BUSY, &dev->flags)) + cpu_relax(); + + ret = dev->ctx->stat; + if (!ret && copy_to_user(argp, dev->payload_base, 16)) + ret = -EFAULT; + + kfree(dev->ctx); + + return ret; +} + +static const struct file_operations dcp_bootstream_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = dcp_bootstream_ioctl, + .open = dcp_bootstream_open, +}; + +static int dcp_probe(struct platform_device *pdev) +{ + struct dcp_dev *dev = NULL; + struct resource *r; + int i, ret, j; + + dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + global_dev = dev; + dev->dev = &pdev->dev; + + platform_set_drvdata(pdev, dev); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(&pdev->dev, "failed to get IORESOURCE_MEM\n"); + return -ENXIO; + } + dev->dcp_regs_base = devm_ioremap(&pdev->dev, r->start, + resource_size(r)); + + dcp_set(dev, DCP_CTRL_SFRST, DCP_REG_CTRL); + udelay(10); + dcp_clear(dev, DCP_CTRL_SFRST | DCP_CTRL_CLKGATE, DCP_REG_CTRL); + + dcp_write(dev, DCP_CTRL_GATHER_RES_WRITE | + DCP_CTRL_ENABLE_CONTEXT_CACHE | DCP_CTRL_CH_IRQ_E_1, + DCP_REG_CTRL); + + dcp_write(dev, DCP_CHAN_CTRL_ENABLE_1, DCP_REG_CHAN_CTRL); + + for (i = 0; i < 4; i++) + dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, i)); + + dcp_clear(dev, -1, DCP_REG_STAT); + + + r = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!r) { + dev_err(&pdev->dev, "can't get IRQ resource (0)\n"); + return -EIO; + } + dev->dcp_vmi_irq = r->start; + ret = request_irq(dev->dcp_vmi_irq, dcp_vmi_irq, 0, "dcp", dev); + if (ret != 0) { + dev_err(&pdev->dev, "can't request_irq (0)\n"); + return -EIO; + } + + r = platform_get_resource(pdev, IORESOURCE_IRQ, 1); + if (!r) { + dev_err(&pdev->dev, "can't get IRQ resource (1)\n"); + ret = -EIO; + goto err_free_irq0; + } + dev->dcp_irq = r->start; + ret = request_irq(dev->dcp_irq, dcp_irq, 0, "dcp", dev); + if (ret != 0) { + dev_err(&pdev->dev, "can't request_irq (1)\n"); + ret = -EIO; + goto err_free_irq0; + } + + dev->hw_pkg[0] = dma_alloc_coherent(&pdev->dev, + DCP_MAX_PKG * sizeof(struct dcp_hw_packet), + &dev->hw_phys_pkg, + GFP_KERNEL); + if (!dev->hw_pkg[0]) { + dev_err(&pdev->dev, "Could not allocate hw descriptors\n"); + ret = -ENOMEM; + goto err_free_irq1; + } + + for (i = 1; i < DCP_MAX_PKG; i++) { + dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg + + i * sizeof(struct dcp_hw_packet); + dev->hw_pkg[i] = dev->hw_pkg[i - 1] + 1; + } + dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg; + + + dev->payload_base = dma_alloc_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, + &dev->payload_base_dma, GFP_KERNEL); + if (!dev->payload_base) { + dev_err(&pdev->dev, "Could not allocate memory for key\n"); + ret = -ENOMEM; + goto err_free_hw_packet; + } + tasklet_init(&dev->queue_task, dcp_queue_task, + (unsigned long) dev); + tasklet_init(&dev->done_task, dcp_done_task, + (unsigned long) dev); + spin_lock_init(&dev->queue_lock); + + crypto_init_queue(&dev->queue, 10); + + init_timer(&dev->watchdog); + dev->watchdog.function = &dcp_watchdog; + dev->watchdog.data = (unsigned long)dev; + + dev->dcp_bootstream_misc.minor = MISC_DYNAMIC_MINOR, + dev->dcp_bootstream_misc.name = "dcpboot", + dev->dcp_bootstream_misc.fops = &dcp_bootstream_fops, + ret = misc_register(&dev->dcp_bootstream_misc); + if (ret != 0) { + dev_err(dev->dev, "Unable to register misc device\n"); + goto err_free_key_iv; + } + + for (i = 0; i < ARRAY_SIZE(algs); i++) { + algs[i].cra_priority = 300; + algs[i].cra_ctxsize = sizeof(struct dcp_op); + algs[i].cra_module = THIS_MODULE; + algs[i].cra_init = dcp_cra_init; + algs[i].cra_exit = dcp_cra_exit; + if (crypto_register_alg(&algs[i])) { + dev_err(&pdev->dev, "register algorithm failed\n"); + ret = -ENOMEM; + goto err_unregister; + } + } + dev_notice(&pdev->dev, "DCP crypto enabled.!\n"); + + return 0; + +err_unregister: + for (j = 0; j < i; j++) + crypto_unregister_alg(&algs[j]); +err_free_key_iv: + dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base, + dev->payload_base_dma); +err_free_hw_packet: + dma_free_coherent(&pdev->dev, DCP_MAX_PKG * + sizeof(struct dcp_hw_packet), dev->hw_pkg[0], + dev->hw_phys_pkg); +err_free_irq1: + free_irq(dev->dcp_irq, dev); +err_free_irq0: + free_irq(dev->dcp_vmi_irq, dev); + + return ret; +} + +static int dcp_remove(struct platform_device *pdev) +{ + struct dcp_dev *dev; + int j; + dev = platform_get_drvdata(pdev); + + dma_free_coherent(&pdev->dev, + DCP_MAX_PKG * sizeof(struct dcp_hw_packet), + dev->hw_pkg[0], dev->hw_phys_pkg); + + dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base, + dev->payload_base_dma); + + free_irq(dev->dcp_irq, dev); + free_irq(dev->dcp_vmi_irq, dev); + + tasklet_kill(&dev->done_task); + tasklet_kill(&dev->queue_task); + + for (j = 0; j < ARRAY_SIZE(algs); j++) + crypto_unregister_alg(&algs[j]); + + misc_deregister(&dev->dcp_bootstream_misc); + + return 0; +} + +static struct of_device_id fs_dcp_of_match[] = { + { .compatible = "fsl-dcp"}, + {}, +}; + +static struct platform_driver fs_dcp_driver = { + .probe = dcp_probe, + .remove = dcp_remove, + .driver = { + .name = "fsl-dcp", + .owner = THIS_MODULE, + .of_match_table = fs_dcp_of_match + } +}; + +module_platform_driver(fs_dcp_driver); + + +MODULE_AUTHOR("Tobias Rauter <tobias.rauter@gmail.com>"); +MODULE_DESCRIPTION("Freescale DCP Crypto Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c index ebf130e894b5..12fea3e22348 100644 --- a/drivers/crypto/hifn_795x.c +++ b/drivers/crypto/hifn_795x.c @@ -2676,7 +2676,7 @@ err_out_stop_device: hifn_reset_dma(dev, 1); hifn_stop_device(dev); err_out_free_irq: - free_irq(dev->irq, dev->name); + free_irq(dev->irq, dev); tasklet_kill(&dev->tasklet); err_out_free_desc: pci_free_consistent(pdev, sizeof(struct hifn_dma), @@ -2711,7 +2711,7 @@ static void hifn_remove(struct pci_dev *pdev) hifn_reset_dma(dev, 1); hifn_stop_device(dev); - free_irq(dev->irq, dev->name); + free_irq(dev->irq, dev); tasklet_kill(&dev->tasklet); hifn_flush(dev); diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index ce6290e5471a..3374a3ebe4c7 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -1146,7 +1146,6 @@ err_unmap_reg: err: kfree(cp); cpg = NULL; - platform_set_drvdata(pdev, NULL); return ret; } diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index ee15b0f7849a..5f7980586850 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -203,13 +203,6 @@ static void omap_aes_write_n(struct omap_aes_dev *dd, u32 offset, static int omap_aes_hw_init(struct omap_aes_dev *dd) { - /* - * clocks are enabled when request starts and disabled when finished. - * It may be long delays between requests. - * Device might go to off mode to save power. - */ - pm_runtime_get_sync(dd->dev); - if (!(dd->flags & FLAGS_INIT)) { dd->flags |= FLAGS_INIT; dd->err = 0; @@ -636,7 +629,6 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err) pr_debug("err: %d\n", err); - pm_runtime_put(dd->dev); dd->flags &= ~FLAGS_BUSY; req->base.complete(&req->base, err); @@ -837,8 +829,16 @@ static int omap_aes_ctr_decrypt(struct ablkcipher_request *req) static int omap_aes_cra_init(struct crypto_tfm *tfm) { - pr_debug("enter\n"); + struct omap_aes_dev *dd = NULL; + + /* Find AES device, currently picks the first device */ + spin_lock_bh(&list_lock); + list_for_each_entry(dd, &dev_list, list) { + break; + } + spin_unlock_bh(&list_lock); + pm_runtime_get_sync(dd->dev); tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx); return 0; @@ -846,7 +846,16 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm) static void omap_aes_cra_exit(struct crypto_tfm *tfm) { - pr_debug("enter\n"); + struct omap_aes_dev *dd = NULL; + + /* Find AES device, currently picks the first device */ + spin_lock_bh(&list_lock); + list_for_each_entry(dd, &dev_list, list) { + break; + } + spin_unlock_bh(&list_lock); + + pm_runtime_put_sync(dd->dev); } /* ********************** ALGS ************************************ */ @@ -1125,10 +1134,9 @@ static int omap_aes_probe(struct platform_device *pdev) if (err) goto err_res; - dd->io_base = devm_request_and_ioremap(dev, &res); - if (!dd->io_base) { - dev_err(dev, "can't ioremap\n"); - err = -ENOMEM; + dd->io_base = devm_ioremap_resource(dev, &res); + if (IS_ERR(dd->io_base)) { + err = PTR_ERR(dd->io_base); goto err_res; } dd->phys_base = res.start; diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index a1e1b4756ee5..4bb67652c200 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -1686,10 +1686,9 @@ static int omap_sham_probe(struct platform_device *pdev) if (err) goto res_err; - dd->io_base = devm_request_and_ioremap(dev, &res); - if (!dd->io_base) { - dev_err(dev, "can't ioremap\n"); - err = -ENOMEM; + dd->io_base = devm_ioremap_resource(dev, &res); + if (IS_ERR(dd->io_base)) { + err = PTR_ERR(dd->io_base); goto res_err; } dd->phys_base = res.start; diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index ac30724d923d..888f7f4a6d3f 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -1298,7 +1298,7 @@ static ssize_t spacc_stat_irq_thresh_store(struct device *dev, struct spacc_engine *engine = spacc_dev_to_engine(dev); unsigned long thresh; - if (strict_strtoul(buf, 0, &thresh)) + if (kstrtoul(buf, 0, &thresh)) return -EINVAL; thresh = clamp(thresh, 1UL, engine->fifo_sz - 1); diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 4b314326f48a..cf149b19ff47 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -647,7 +647,6 @@ static int s5p_aes_probe(struct platform_device *pdev) clk_disable(pdata->clk); s5p_dev = NULL; - platform_set_drvdata(pdev, NULL); return err; } @@ -668,7 +667,6 @@ static int s5p_aes_remove(struct platform_device *pdev) clk_disable(pdata->clk); s5p_dev = NULL; - platform_set_drvdata(pdev, NULL); return 0; } diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index 83d79b964d12..a999f537228f 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c @@ -1629,7 +1629,7 @@ static int ux500_cryp_remove(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res) - release_mem_region(res->start, res->end - res->start + 1); + release_mem_region(res->start, resource_size(res)); kfree(device_data); diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 3215a3cb3de8..6825957c97fb 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -79,25 +79,7 @@ config INTEL_IOP_ADMA help Enable support for the Intel(R) IOP Series RAID engines. -config DW_DMAC - tristate "Synopsys DesignWare AHB DMA support" - depends on GENERIC_HARDIRQS - select DMA_ENGINE - default y if CPU_AT32AP7000 - help - Support the Synopsys DesignWare AHB DMA controller. This - can be integrated in chips such as the Atmel AT32ap7000. - -config DW_DMAC_BIG_ENDIAN_IO - bool "Use big endian I/O register access" - default y if AVR32 - depends on DW_DMAC - help - Say yes here to use big endian I/O access when reading and writing - to the DMA controller registers. This is needed on some platforms, - like the Atmel AVR32 architecture. - - If unsure, use the default setting. +source "drivers/dma/dw/Kconfig" config AT_HDMAC tristate "Atmel AHB DMA support" @@ -312,6 +294,12 @@ config MMP_PDMA help Support the MMP PDMA engine for PXA and MMP platfrom. +config DMA_JZ4740 + tristate "JZ4740 DMA support" + depends on MACH_JZ4740 + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + config DMA_ENGINE bool diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index a2b0df591f95..5e0f2ef85614 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_FSL_DMA) += fsldma.o obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/ obj-$(CONFIG_MV_XOR) += mv_xor.o -obj-$(CONFIG_DW_DMAC) += dw_dmac.o +obj-$(CONFIG_DW_DMAC_CORE) += dw/ obj-$(CONFIG_AT_HDMAC) += at_hdmac.o obj-$(CONFIG_MX3_IPU) += ipu/ obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o @@ -38,3 +38,4 @@ obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o obj-$(CONFIG_DMA_OMAP) += omap-dma.o obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o +obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 8bad254a498d..06fe45c74de5 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -299,8 +299,8 @@ static int pl08x_request_mux(struct pl08x_dma_chan *plchan) const struct pl08x_platform_data *pd = plchan->host->pd; int ret; - if (plchan->mux_use++ == 0 && pd->get_signal) { - ret = pd->get_signal(plchan->cd); + if (plchan->mux_use++ == 0 && pd->get_xfer_signal) { + ret = pd->get_xfer_signal(plchan->cd); if (ret < 0) { plchan->mux_use = 0; return ret; @@ -318,8 +318,8 @@ static void pl08x_release_mux(struct pl08x_dma_chan *plchan) if (plchan->signal >= 0) { WARN_ON(plchan->mux_use == 0); - if (--plchan->mux_use == 0 && pd->put_signal) { - pd->put_signal(plchan->cd, plchan->signal); + if (--plchan->mux_use == 0 && pd->put_xfer_signal) { + pd->put_xfer_signal(plchan->cd, plchan->signal); plchan->signal = -1; } } diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index e923cda930f9..c787f38a186a 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -14,6 +14,7 @@ * found on AT91SAM9263. */ +#include <dt-bindings/dma/at91.h> #include <linux/clk.h> #include <linux/dmaengine.h> #include <linux/dma-mapping.h> @@ -54,6 +55,7 @@ MODULE_PARM_DESC(init_nr_desc_per_channel, /* prototypes */ static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx); +static void atc_issue_pending(struct dma_chan *chan); /*----------------------------------------------------------------------*/ @@ -230,6 +232,95 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) vdbg_dump_regs(atchan); } +/* + * atc_get_current_descriptors - + * locate the descriptor which equal to physical address in DSCR + * @atchan: the channel we want to start + * @dscr_addr: physical descriptor address in DSCR + */ +static struct at_desc *atc_get_current_descriptors(struct at_dma_chan *atchan, + u32 dscr_addr) +{ + struct at_desc *desc, *_desc, *child, *desc_cur = NULL; + + list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) { + if (desc->lli.dscr == dscr_addr) { + desc_cur = desc; + break; + } + + list_for_each_entry(child, &desc->tx_list, desc_node) { + if (child->lli.dscr == dscr_addr) { + desc_cur = child; + break; + } + } + } + + return desc_cur; +} + +/* + * atc_get_bytes_left - + * Get the number of bytes residue in dma buffer, + * @chan: the channel we want to start + */ +static int atc_get_bytes_left(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + int chan_id = atchan->chan_common.chan_id; + struct at_desc *desc_first = atc_first_active(atchan); + struct at_desc *desc_cur; + int ret = 0, count = 0; + + /* + * Initialize necessary values in the first time. + * remain_desc record remain desc length. + */ + if (atchan->remain_desc == 0) + /* First descriptor embedds the transaction length */ + atchan->remain_desc = desc_first->len; + + /* + * This happens when current descriptor transfer complete. + * The residual buffer size should reduce current descriptor length. + */ + if (unlikely(test_bit(ATC_IS_BTC, &atchan->status))) { + clear_bit(ATC_IS_BTC, &atchan->status); + desc_cur = atc_get_current_descriptors(atchan, + channel_readl(atchan, DSCR)); + if (!desc_cur) { + ret = -EINVAL; + goto out; + } + atchan->remain_desc -= (desc_cur->lli.ctrla & ATC_BTSIZE_MAX) + << (desc_first->tx_width); + if (atchan->remain_desc < 0) { + ret = -EINVAL; + goto out; + } else { + ret = atchan->remain_desc; + } + } else { + /* + * Get residual bytes when current + * descriptor transfer in progress. + */ + count = (channel_readl(atchan, CTRLA) & ATC_BTSIZE_MAX) + << (desc_first->tx_width); + ret = atchan->remain_desc - count; + } + /* + * Check fifo empty. + */ + if (!(dma_readl(atdma, CHSR) & AT_DMA_EMPT(chan_id))) + atc_issue_pending(chan); + +out: + return ret; +} + /** * atc_chain_complete - finish work for one transaction chain * @atchan: channel we work on @@ -327,37 +418,6 @@ static void atc_complete_all(struct at_dma_chan *atchan) } /** - * atc_cleanup_descriptors - cleanup up finished descriptors in active_list - * @atchan: channel to be cleaned up - * - * Called with atchan->lock held and bh disabled - */ -static void atc_cleanup_descriptors(struct at_dma_chan *atchan) -{ - struct at_desc *desc, *_desc; - struct at_desc *child; - - dev_vdbg(chan2dev(&atchan->chan_common), "cleanup descriptors\n"); - - list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) { - if (!(desc->lli.ctrla & ATC_DONE)) - /* This one is currently in progress */ - return; - - list_for_each_entry(child, &desc->tx_list, desc_node) - if (!(child->lli.ctrla & ATC_DONE)) - /* Currently in progress */ - return; - - /* - * No descriptors so far seem to be in progress, i.e. - * this chain must be done. - */ - atc_chain_complete(atchan, desc); - } -} - -/** * atc_advance_work - at the end of a transaction, move forward * @atchan: channel where the transaction ended * @@ -496,6 +556,8 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) /* Give information to tasklet */ set_bit(ATC_IS_ERROR, &atchan->status); } + if (pending & AT_DMA_BTC(i)) + set_bit(ATC_IS_BTC, &atchan->status); tasklet_schedule(&atchan->tasklet); ret = IRQ_HANDLED; } @@ -615,6 +677,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, /* First descriptor of the chain embedds additional information */ first->txd.cookie = -EBUSY; first->len = len; + first->tx_width = src_width; /* set end-of-link to the last link descriptor of list*/ set_desc_eol(desc); @@ -761,6 +824,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, /* First descriptor of the chain embedds additional information */ first->txd.cookie = -EBUSY; first->len = total_len; + first->tx_width = reg_width; /* first link descriptor of list is responsible of flags */ first->txd.flags = flags; /* client is in control of this ack */ @@ -919,6 +983,7 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, /* First descriptor of the chain embedds additional information */ first->txd.cookie = -EBUSY; first->len = buf_len; + first->tx_width = reg_width; return &first->txd; @@ -1032,34 +1097,36 @@ atc_tx_status(struct dma_chan *chan, struct dma_tx_state *txstate) { struct at_dma_chan *atchan = to_at_dma_chan(chan); - dma_cookie_t last_used; - dma_cookie_t last_complete; unsigned long flags; enum dma_status ret; - - spin_lock_irqsave(&atchan->lock, flags); + int bytes = 0; ret = dma_cookie_status(chan, cookie, txstate); - if (ret != DMA_SUCCESS) { - atc_cleanup_descriptors(atchan); + if (ret == DMA_SUCCESS) + return ret; + /* + * There's no point calculating the residue if there's + * no txstate to store the value. + */ + if (!txstate) + return DMA_ERROR; - ret = dma_cookie_status(chan, cookie, txstate); - } + spin_lock_irqsave(&atchan->lock, flags); - last_complete = chan->completed_cookie; - last_used = chan->cookie; + /* Get number of bytes left in the active transactions */ + bytes = atc_get_bytes_left(chan); spin_unlock_irqrestore(&atchan->lock, flags); - if (ret != DMA_SUCCESS) - dma_set_residue(txstate, atc_first_active(atchan)->len); - - if (atc_chan_is_paused(atchan)) - ret = DMA_PAUSED; + if (unlikely(bytes < 0)) { + dev_vdbg(chan2dev(chan), "get residual bytes error\n"); + return DMA_ERROR; + } else { + dma_set_residue(txstate, bytes); + } - dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d (d%d, u%d)\n", - ret, cookie, last_complete ? last_complete : 0, - last_used ? last_used : 0); + dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d residue = %d\n", + ret, cookie, bytes); return ret; } @@ -1120,7 +1187,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) */ BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_common.dev); - /* if cfg configuration specified take it instad of default */ + /* if cfg configuration specified take it instead of default */ if (atslave->cfg) cfg = atslave->cfg; } @@ -1143,6 +1210,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) spin_lock_irqsave(&atchan->lock, flags); atchan->descs_allocated = i; + atchan->remain_desc = 0; list_splice(&tmp_list, &atchan->free_list); dma_cookie_init(chan); spin_unlock_irqrestore(&atchan->lock, flags); @@ -1185,6 +1253,7 @@ static void atc_free_chan_resources(struct dma_chan *chan) list_splice_init(&atchan->free_list, &list); atchan->descs_allocated = 0; atchan->status = 0; + atchan->remain_desc = 0; dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); } @@ -1223,14 +1292,31 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec, atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL); if (!atslave) return NULL; + + atslave->cfg = ATC_DST_H2SEL_HW | ATC_SRC_H2SEL_HW; /* * We can fill both SRC_PER and DST_PER, one of these fields will be * ignored depending on DMA transfer direction. */ - per_id = dma_spec->args[1]; - atslave->cfg = ATC_FIFOCFG_HALFFIFO | ATC_DST_H2SEL_HW - | ATC_SRC_H2SEL_HW | ATC_DST_PER(per_id) - | ATC_SRC_PER(per_id); + per_id = dma_spec->args[1] & AT91_DMA_CFG_PER_ID_MASK; + atslave->cfg |= ATC_DST_PER_MSB(per_id) | ATC_DST_PER(per_id) + | ATC_SRC_PER_MSB(per_id) | ATC_SRC_PER(per_id); + /* + * We have to translate the value we get from the device tree since + * the half FIFO configuration value had to be 0 to keep backward + * compatibility. + */ + switch (dma_spec->args[1] & AT91_DMA_CFG_FIFOCFG_MASK) { + case AT91_DMA_CFG_FIFOCFG_ALAP: + atslave->cfg |= ATC_FIFOCFG_LARGESTBURST; + break; + case AT91_DMA_CFG_FIFOCFG_ASAP: + atslave->cfg |= ATC_FIFOCFG_ENOUGHSPACE; + break; + case AT91_DMA_CFG_FIFOCFG_HALF: + default: + atslave->cfg |= ATC_FIFOCFG_HALFFIFO; + } atslave->dma_dev = &dmac_pdev->dev; chan = dma_request_channel(mask, at_dma_filter, atslave); @@ -1374,7 +1460,9 @@ static int __init at_dma_probe(struct platform_device *pdev) err = PTR_ERR(atdma->clk); goto err_clk; } - clk_enable(atdma->clk); + err = clk_prepare_enable(atdma->clk); + if (err) + goto err_clk_prepare; /* force dma off, just in case */ at_dma_off(atdma); @@ -1472,10 +1560,10 @@ err_of_dma_controller_register: dma_async_device_unregister(&atdma->dma_common); dma_pool_destroy(atdma->dma_desc_pool); err_pool_create: - platform_set_drvdata(pdev, NULL); free_irq(platform_get_irq(pdev, 0), atdma); err_irq: - clk_disable(atdma->clk); + clk_disable_unprepare(atdma->clk); +err_clk_prepare: clk_put(atdma->clk); err_clk: iounmap(atdma->regs); @@ -1497,7 +1585,6 @@ static int at_dma_remove(struct platform_device *pdev) dma_async_device_unregister(&atdma->dma_common); dma_pool_destroy(atdma->dma_desc_pool); - platform_set_drvdata(pdev, NULL); free_irq(platform_get_irq(pdev, 0), atdma); list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, @@ -1512,7 +1599,7 @@ static int at_dma_remove(struct platform_device *pdev) list_del(&chan->device_node); } - clk_disable(atdma->clk); + clk_disable_unprepare(atdma->clk); clk_put(atdma->clk); iounmap(atdma->regs); @@ -1531,7 +1618,7 @@ static void at_dma_shutdown(struct platform_device *pdev) struct at_dma *atdma = platform_get_drvdata(pdev); at_dma_off(platform_get_drvdata(pdev)); - clk_disable(atdma->clk); + clk_disable_unprepare(atdma->clk); } static int at_dma_prepare(struct device *dev) @@ -1588,7 +1675,7 @@ static int at_dma_suspend_noirq(struct device *dev) /* disable DMA controller */ at_dma_off(atdma); - clk_disable(atdma->clk); + clk_disable_unprepare(atdma->clk); return 0; } @@ -1618,7 +1705,7 @@ static int at_dma_resume_noirq(struct device *dev) struct dma_chan *chan, *_chan; /* bring back DMA controller */ - clk_enable(atdma->clk); + clk_prepare_enable(atdma->clk); dma_writel(atdma, EN, AT_DMA_ENABLE); /* clear any pending interrupt */ diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index c604d26fd4d3..f31d647acdfa 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -182,6 +182,7 @@ struct at_lli { * @txd: support for the async_tx api * @desc_node: node on the channed descriptors list * @len: total transaction bytecount + * @tx_width: transfer width */ struct at_desc { /* FIRST values the hardware uses */ @@ -192,6 +193,7 @@ struct at_desc { struct dma_async_tx_descriptor txd; struct list_head desc_node; size_t len; + u32 tx_width; }; static inline struct at_desc * @@ -211,6 +213,7 @@ txd_to_at_desc(struct dma_async_tx_descriptor *txd) enum atc_status { ATC_IS_ERROR = 0, ATC_IS_PAUSED = 1, + ATC_IS_BTC = 2, ATC_IS_CYCLIC = 24, }; @@ -228,6 +231,7 @@ enum atc_status { * @save_cfg: configuration register that is saved on suspend/resume cycle * @save_dscr: for cyclic operations, preserve next descriptor address in * the cyclic list on suspend/resume cycle + * @remain_desc: to save remain desc length * @dma_sconfig: configuration for slave transfers, passed via DMA_SLAVE_CONFIG * @lock: serializes enqueue/dequeue operations to descriptors lists * @active_list: list of descriptors dmaengine is being running on @@ -246,6 +250,7 @@ struct at_dma_chan { struct tasklet_struct tasklet; u32 save_cfg; u32 save_dscr; + u32 remain_desc; struct dma_slave_config dma_sconfig; spinlock_t lock; diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c new file mode 100644 index 000000000000..b0c0c8268d42 --- /dev/null +++ b/drivers/dma/dma-jz4740.c @@ -0,0 +1,617 @@ +/* + * Copyright (C) 2013, Lars-Peter Clausen <lars@metafoo.de> + * JZ4740 DMAC support + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/irq.h> +#include <linux/clk.h> + +#include <asm/mach-jz4740/dma.h> + +#include "virt-dma.h" + +#define JZ_DMA_NR_CHANS 6 + +#define JZ_REG_DMA_SRC_ADDR(x) (0x00 + (x) * 0x20) +#define JZ_REG_DMA_DST_ADDR(x) (0x04 + (x) * 0x20) +#define JZ_REG_DMA_TRANSFER_COUNT(x) (0x08 + (x) * 0x20) +#define JZ_REG_DMA_REQ_TYPE(x) (0x0C + (x) * 0x20) +#define JZ_REG_DMA_STATUS_CTRL(x) (0x10 + (x) * 0x20) +#define JZ_REG_DMA_CMD(x) (0x14 + (x) * 0x20) +#define JZ_REG_DMA_DESC_ADDR(x) (0x18 + (x) * 0x20) + +#define JZ_REG_DMA_CTRL 0x300 +#define JZ_REG_DMA_IRQ 0x304 +#define JZ_REG_DMA_DOORBELL 0x308 +#define JZ_REG_DMA_DOORBELL_SET 0x30C + +#define JZ_DMA_STATUS_CTRL_NO_DESC BIT(31) +#define JZ_DMA_STATUS_CTRL_DESC_INV BIT(6) +#define JZ_DMA_STATUS_CTRL_ADDR_ERR BIT(4) +#define JZ_DMA_STATUS_CTRL_TRANSFER_DONE BIT(3) +#define JZ_DMA_STATUS_CTRL_HALT BIT(2) +#define JZ_DMA_STATUS_CTRL_COUNT_TERMINATE BIT(1) +#define JZ_DMA_STATUS_CTRL_ENABLE BIT(0) + +#define JZ_DMA_CMD_SRC_INC BIT(23) +#define JZ_DMA_CMD_DST_INC BIT(22) +#define JZ_DMA_CMD_RDIL_MASK (0xf << 16) +#define JZ_DMA_CMD_SRC_WIDTH_MASK (0x3 << 14) +#define JZ_DMA_CMD_DST_WIDTH_MASK (0x3 << 12) +#define JZ_DMA_CMD_INTERVAL_LENGTH_MASK (0x7 << 8) +#define JZ_DMA_CMD_BLOCK_MODE BIT(7) +#define JZ_DMA_CMD_DESC_VALID BIT(4) +#define JZ_DMA_CMD_DESC_VALID_MODE BIT(3) +#define JZ_DMA_CMD_VALID_IRQ_ENABLE BIT(2) +#define JZ_DMA_CMD_TRANSFER_IRQ_ENABLE BIT(1) +#define JZ_DMA_CMD_LINK_ENABLE BIT(0) + +#define JZ_DMA_CMD_FLAGS_OFFSET 22 +#define JZ_DMA_CMD_RDIL_OFFSET 16 +#define JZ_DMA_CMD_SRC_WIDTH_OFFSET 14 +#define JZ_DMA_CMD_DST_WIDTH_OFFSET 12 +#define JZ_DMA_CMD_TRANSFER_SIZE_OFFSET 8 +#define JZ_DMA_CMD_MODE_OFFSET 7 + +#define JZ_DMA_CTRL_PRIORITY_MASK (0x3 << 8) +#define JZ_DMA_CTRL_HALT BIT(3) +#define JZ_DMA_CTRL_ADDRESS_ERROR BIT(2) +#define JZ_DMA_CTRL_ENABLE BIT(0) + +enum jz4740_dma_width { + JZ4740_DMA_WIDTH_32BIT = 0, + JZ4740_DMA_WIDTH_8BIT = 1, + JZ4740_DMA_WIDTH_16BIT = 2, +}; + +enum jz4740_dma_transfer_size { + JZ4740_DMA_TRANSFER_SIZE_4BYTE = 0, + JZ4740_DMA_TRANSFER_SIZE_1BYTE = 1, + JZ4740_DMA_TRANSFER_SIZE_2BYTE = 2, + JZ4740_DMA_TRANSFER_SIZE_16BYTE = 3, + JZ4740_DMA_TRANSFER_SIZE_32BYTE = 4, +}; + +enum jz4740_dma_flags { + JZ4740_DMA_SRC_AUTOINC = 0x2, + JZ4740_DMA_DST_AUTOINC = 0x1, +}; + +enum jz4740_dma_mode { + JZ4740_DMA_MODE_SINGLE = 0, + JZ4740_DMA_MODE_BLOCK = 1, +}; + +struct jz4740_dma_sg { + dma_addr_t addr; + unsigned int len; +}; + +struct jz4740_dma_desc { + struct virt_dma_desc vdesc; + + enum dma_transfer_direction direction; + bool cyclic; + + unsigned int num_sgs; + struct jz4740_dma_sg sg[]; +}; + +struct jz4740_dmaengine_chan { + struct virt_dma_chan vchan; + unsigned int id; + + dma_addr_t fifo_addr; + unsigned int transfer_shift; + + struct jz4740_dma_desc *desc; + unsigned int next_sg; +}; + +struct jz4740_dma_dev { + struct dma_device ddev; + void __iomem *base; + struct clk *clk; + + struct jz4740_dmaengine_chan chan[JZ_DMA_NR_CHANS]; +}; + +static struct jz4740_dma_dev *jz4740_dma_chan_get_dev( + struct jz4740_dmaengine_chan *chan) +{ + return container_of(chan->vchan.chan.device, struct jz4740_dma_dev, + ddev); +} + +static struct jz4740_dmaengine_chan *to_jz4740_dma_chan(struct dma_chan *c) +{ + return container_of(c, struct jz4740_dmaengine_chan, vchan.chan); +} + +static struct jz4740_dma_desc *to_jz4740_dma_desc(struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct jz4740_dma_desc, vdesc); +} + +static inline uint32_t jz4740_dma_read(struct jz4740_dma_dev *dmadev, + unsigned int reg) +{ + return readl(dmadev->base + reg); +} + +static inline void jz4740_dma_write(struct jz4740_dma_dev *dmadev, + unsigned reg, uint32_t val) +{ + writel(val, dmadev->base + reg); +} + +static inline void jz4740_dma_write_mask(struct jz4740_dma_dev *dmadev, + unsigned int reg, uint32_t val, uint32_t mask) +{ + uint32_t tmp; + + tmp = jz4740_dma_read(dmadev, reg); + tmp &= ~mask; + tmp |= val; + jz4740_dma_write(dmadev, reg, tmp); +} + +static struct jz4740_dma_desc *jz4740_dma_alloc_desc(unsigned int num_sgs) +{ + return kzalloc(sizeof(struct jz4740_dma_desc) + + sizeof(struct jz4740_dma_sg) * num_sgs, GFP_ATOMIC); +} + +static enum jz4740_dma_width jz4740_dma_width(enum dma_slave_buswidth width) +{ + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + return JZ4740_DMA_WIDTH_8BIT; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + return JZ4740_DMA_WIDTH_16BIT; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + return JZ4740_DMA_WIDTH_32BIT; + default: + return JZ4740_DMA_WIDTH_32BIT; + } +} + +static enum jz4740_dma_transfer_size jz4740_dma_maxburst(u32 maxburst) +{ + if (maxburst <= 1) + return JZ4740_DMA_TRANSFER_SIZE_1BYTE; + else if (maxburst <= 3) + return JZ4740_DMA_TRANSFER_SIZE_2BYTE; + else if (maxburst <= 15) + return JZ4740_DMA_TRANSFER_SIZE_4BYTE; + else if (maxburst <= 31) + return JZ4740_DMA_TRANSFER_SIZE_16BYTE; + + return JZ4740_DMA_TRANSFER_SIZE_32BYTE; +} + +static int jz4740_dma_slave_config(struct dma_chan *c, + const struct dma_slave_config *config) +{ + struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c); + struct jz4740_dma_dev *dmadev = jz4740_dma_chan_get_dev(chan); + enum jz4740_dma_width src_width; + enum jz4740_dma_width dst_width; + enum jz4740_dma_transfer_size transfer_size; + enum jz4740_dma_flags flags; + uint32_t cmd; + + switch (config->direction) { + case DMA_MEM_TO_DEV: + flags = JZ4740_DMA_SRC_AUTOINC; + transfer_size = jz4740_dma_maxburst(config->dst_maxburst); + chan->fifo_addr = config->dst_addr; + break; + case DMA_DEV_TO_MEM: + flags = JZ4740_DMA_DST_AUTOINC; + transfer_size = jz4740_dma_maxburst(config->src_maxburst); + chan->fifo_addr = config->src_addr; + break; + default: + return -EINVAL; + } + + src_width = jz4740_dma_width(config->src_addr_width); + dst_width = jz4740_dma_width(config->dst_addr_width); + + switch (transfer_size) { + case JZ4740_DMA_TRANSFER_SIZE_2BYTE: + chan->transfer_shift = 1; + break; + case JZ4740_DMA_TRANSFER_SIZE_4BYTE: + chan->transfer_shift = 2; + break; + case JZ4740_DMA_TRANSFER_SIZE_16BYTE: + chan->transfer_shift = 4; + break; + case JZ4740_DMA_TRANSFER_SIZE_32BYTE: + chan->transfer_shift = 5; + break; + default: + chan->transfer_shift = 0; + break; + } + + cmd = flags << JZ_DMA_CMD_FLAGS_OFFSET; + cmd |= src_width << JZ_DMA_CMD_SRC_WIDTH_OFFSET; + cmd |= dst_width << JZ_DMA_CMD_DST_WIDTH_OFFSET; + cmd |= transfer_size << JZ_DMA_CMD_TRANSFER_SIZE_OFFSET; + cmd |= JZ4740_DMA_MODE_SINGLE << JZ_DMA_CMD_MODE_OFFSET; + cmd |= JZ_DMA_CMD_TRANSFER_IRQ_ENABLE; + + jz4740_dma_write(dmadev, JZ_REG_DMA_CMD(chan->id), cmd); + jz4740_dma_write(dmadev, JZ_REG_DMA_STATUS_CTRL(chan->id), 0); + jz4740_dma_write(dmadev, JZ_REG_DMA_REQ_TYPE(chan->id), + config->slave_id); + + return 0; +} + +static int jz4740_dma_terminate_all(struct dma_chan *c) +{ + struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c); + struct jz4740_dma_dev *dmadev = jz4740_dma_chan_get_dev(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->vchan.lock, flags); + jz4740_dma_write_mask(dmadev, JZ_REG_DMA_STATUS_CTRL(chan->id), 0, + JZ_DMA_STATUS_CTRL_ENABLE); + chan->desc = NULL; + vchan_get_all_descriptors(&chan->vchan, &head); + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + vchan_dma_desc_free_list(&chan->vchan, &head); + + return 0; +} + +static int jz4740_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct dma_slave_config *config = (struct dma_slave_config *)arg; + + switch (cmd) { + case DMA_SLAVE_CONFIG: + return jz4740_dma_slave_config(chan, config); + case DMA_TERMINATE_ALL: + return jz4740_dma_terminate_all(chan); + default: + return -ENOSYS; + } +} + +static int jz4740_dma_start_transfer(struct jz4740_dmaengine_chan *chan) +{ + struct jz4740_dma_dev *dmadev = jz4740_dma_chan_get_dev(chan); + dma_addr_t src_addr, dst_addr; + struct virt_dma_desc *vdesc; + struct jz4740_dma_sg *sg; + + jz4740_dma_write_mask(dmadev, JZ_REG_DMA_STATUS_CTRL(chan->id), 0, + JZ_DMA_STATUS_CTRL_ENABLE); + + if (!chan->desc) { + vdesc = vchan_next_desc(&chan->vchan); + if (!vdesc) + return 0; + chan->desc = to_jz4740_dma_desc(vdesc); + chan->next_sg = 0; + } + + if (chan->next_sg == chan->desc->num_sgs) + chan->next_sg = 0; + + sg = &chan->desc->sg[chan->next_sg]; + + if (chan->desc->direction == DMA_MEM_TO_DEV) { + src_addr = sg->addr; + dst_addr = chan->fifo_addr; + } else { + src_addr = chan->fifo_addr; + dst_addr = sg->addr; + } + jz4740_dma_write(dmadev, JZ_REG_DMA_SRC_ADDR(chan->id), src_addr); + jz4740_dma_write(dmadev, JZ_REG_DMA_DST_ADDR(chan->id), dst_addr); + jz4740_dma_write(dmadev, JZ_REG_DMA_TRANSFER_COUNT(chan->id), + sg->len >> chan->transfer_shift); + + chan->next_sg++; + + jz4740_dma_write_mask(dmadev, JZ_REG_DMA_STATUS_CTRL(chan->id), + JZ_DMA_STATUS_CTRL_NO_DESC | JZ_DMA_STATUS_CTRL_ENABLE, + JZ_DMA_STATUS_CTRL_HALT | JZ_DMA_STATUS_CTRL_NO_DESC | + JZ_DMA_STATUS_CTRL_ENABLE); + + jz4740_dma_write_mask(dmadev, JZ_REG_DMA_CTRL, + JZ_DMA_CTRL_ENABLE, + JZ_DMA_CTRL_HALT | JZ_DMA_CTRL_ENABLE); + + return 0; +} + +static void jz4740_dma_chan_irq(struct jz4740_dmaengine_chan *chan) +{ + spin_lock(&chan->vchan.lock); + if (chan->desc) { + if (chan->desc && chan->desc->cyclic) { + vchan_cyclic_callback(&chan->desc->vdesc); + } else { + if (chan->next_sg == chan->desc->num_sgs) { + chan->desc = NULL; + vchan_cookie_complete(&chan->desc->vdesc); + } + } + } + jz4740_dma_start_transfer(chan); + spin_unlock(&chan->vchan.lock); +} + +static irqreturn_t jz4740_dma_irq(int irq, void *devid) +{ + struct jz4740_dma_dev *dmadev = devid; + uint32_t irq_status; + unsigned int i; + + irq_status = readl(dmadev->base + JZ_REG_DMA_IRQ); + + for (i = 0; i < 6; ++i) { + if (irq_status & (1 << i)) { + jz4740_dma_write_mask(dmadev, + JZ_REG_DMA_STATUS_CTRL(i), 0, + JZ_DMA_STATUS_CTRL_ENABLE | + JZ_DMA_STATUS_CTRL_TRANSFER_DONE); + + jz4740_dma_chan_irq(&dmadev->chan[i]); + } + } + + return IRQ_HANDLED; +} + +static void jz4740_dma_issue_pending(struct dma_chan *c) +{ + struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c); + unsigned long flags; + + spin_lock_irqsave(&chan->vchan.lock, flags); + if (vchan_issue_pending(&chan->vchan) && !chan->desc) + jz4740_dma_start_transfer(chan); + spin_unlock_irqrestore(&chan->vchan.lock, flags); +} + +static struct dma_async_tx_descriptor *jz4740_dma_prep_slave_sg( + struct dma_chan *c, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c); + struct jz4740_dma_desc *desc; + struct scatterlist *sg; + unsigned int i; + + desc = jz4740_dma_alloc_desc(sg_len); + if (!desc) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + desc->sg[i].addr = sg_dma_address(sg); + desc->sg[i].len = sg_dma_len(sg); + } + + desc->num_sgs = sg_len; + desc->direction = direction; + desc->cyclic = false; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static struct dma_async_tx_descriptor *jz4740_dma_prep_dma_cyclic( + struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c); + struct jz4740_dma_desc *desc; + unsigned int num_periods, i; + + if (buf_len % period_len) + return NULL; + + num_periods = buf_len / period_len; + + desc = jz4740_dma_alloc_desc(num_periods); + if (!desc) + return NULL; + + for (i = 0; i < num_periods; i++) { + desc->sg[i].addr = buf_addr; + desc->sg[i].len = period_len; + buf_addr += period_len; + } + + desc->num_sgs = num_periods; + desc->direction = direction; + desc->cyclic = true; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static size_t jz4740_dma_desc_residue(struct jz4740_dmaengine_chan *chan, + struct jz4740_dma_desc *desc, unsigned int next_sg) +{ + struct jz4740_dma_dev *dmadev = jz4740_dma_chan_get_dev(chan); + unsigned int residue, count; + unsigned int i; + + residue = 0; + + for (i = next_sg; i < desc->num_sgs; i++) + residue += desc->sg[i].len; + + if (next_sg != 0) { + count = jz4740_dma_read(dmadev, + JZ_REG_DMA_TRANSFER_COUNT(chan->id)); + residue += count << chan->transfer_shift; + } + + return residue; +} + +static enum dma_status jz4740_dma_tx_status(struct dma_chan *c, + dma_cookie_t cookie, struct dma_tx_state *state) +{ + struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c); + struct virt_dma_desc *vdesc; + enum dma_status status; + unsigned long flags; + + status = dma_cookie_status(c, cookie, state); + if (status == DMA_SUCCESS || !state) + return status; + + spin_lock_irqsave(&chan->vchan.lock, flags); + vdesc = vchan_find_desc(&chan->vchan, cookie); + if (cookie == chan->desc->vdesc.tx.cookie) { + state->residue = jz4740_dma_desc_residue(chan, chan->desc, + chan->next_sg); + } else if (vdesc) { + state->residue = jz4740_dma_desc_residue(chan, + to_jz4740_dma_desc(vdesc), 0); + } else { + state->residue = 0; + } + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + return status; +} + +static int jz4740_dma_alloc_chan_resources(struct dma_chan *c) +{ + return 0; +} + +static void jz4740_dma_free_chan_resources(struct dma_chan *c) +{ + vchan_free_chan_resources(to_virt_chan(c)); +} + +static void jz4740_dma_desc_free(struct virt_dma_desc *vdesc) +{ + kfree(container_of(vdesc, struct jz4740_dma_desc, vdesc)); +} + +static int jz4740_dma_probe(struct platform_device *pdev) +{ + struct jz4740_dmaengine_chan *chan; + struct jz4740_dma_dev *dmadev; + struct dma_device *dd; + unsigned int i; + struct resource *res; + int ret; + int irq; + + dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev), GFP_KERNEL); + if (!dmadev) + return -EINVAL; + + dd = &dmadev->ddev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + dmadev->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(dmadev->base)) + return PTR_ERR(dmadev->base); + + dmadev->clk = clk_get(&pdev->dev, "dma"); + if (IS_ERR(dmadev->clk)) + return PTR_ERR(dmadev->clk); + + clk_prepare_enable(dmadev->clk); + + dma_cap_set(DMA_SLAVE, dd->cap_mask); + dma_cap_set(DMA_CYCLIC, dd->cap_mask); + dd->device_alloc_chan_resources = jz4740_dma_alloc_chan_resources; + dd->device_free_chan_resources = jz4740_dma_free_chan_resources; + dd->device_tx_status = jz4740_dma_tx_status; + dd->device_issue_pending = jz4740_dma_issue_pending; + dd->device_prep_slave_sg = jz4740_dma_prep_slave_sg; + dd->device_prep_dma_cyclic = jz4740_dma_prep_dma_cyclic; + dd->device_control = jz4740_dma_control; + dd->dev = &pdev->dev; + dd->chancnt = JZ_DMA_NR_CHANS; + INIT_LIST_HEAD(&dd->channels); + + for (i = 0; i < dd->chancnt; i++) { + chan = &dmadev->chan[i]; + chan->id = i; + chan->vchan.desc_free = jz4740_dma_desc_free; + vchan_init(&chan->vchan, dd); + } + + ret = dma_async_device_register(dd); + if (ret) + return ret; + + irq = platform_get_irq(pdev, 0); + ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev); + if (ret) + goto err_unregister; + + platform_set_drvdata(pdev, dmadev); + + return 0; + +err_unregister: + dma_async_device_unregister(dd); + return ret; +} + +static int jz4740_dma_remove(struct platform_device *pdev) +{ + struct jz4740_dma_dev *dmadev = platform_get_drvdata(pdev); + int irq = platform_get_irq(pdev, 0); + + free_irq(irq, dmadev); + dma_async_device_unregister(&dmadev->ddev); + clk_disable_unprepare(dmadev->clk); + + return 0; +} + +static struct platform_driver jz4740_dma_driver = { + .probe = jz4740_dma_probe, + .remove = jz4740_dma_remove, + .driver = { + .name = "jz4740-dma", + .owner = THIS_MODULE, + }, +}; +module_platform_driver(jz4740_dma_driver); + +MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>"); +MODULE_DESCRIPTION("JZ4740 DMA driver"); +MODULE_LICENSE("GPLv2"); diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig new file mode 100644 index 000000000000..dde13248b681 --- /dev/null +++ b/drivers/dma/dw/Kconfig @@ -0,0 +1,29 @@ +# +# DMA engine configuration for dw +# + +config DW_DMAC_CORE + tristate "Synopsys DesignWare AHB DMA support" + depends on GENERIC_HARDIRQS + select DMA_ENGINE + +config DW_DMAC + tristate "Synopsys DesignWare AHB DMA platform driver" + select DW_DMAC_CORE + select DW_DMAC_BIG_ENDIAN_IO if AVR32 + default y if CPU_AT32AP7000 + help + Support the Synopsys DesignWare AHB DMA controller. This + can be integrated in chips such as the Atmel AT32ap7000. + +config DW_DMAC_PCI + tristate "Synopsys DesignWare AHB DMA PCI driver" + depends on PCI + select DW_DMAC_CORE + help + Support the Synopsys DesignWare AHB DMA controller on the + platfroms that enumerate it as a PCI device. For example, + Intel Medfield has integrated this GPDMA controller. + +config DW_DMAC_BIG_ENDIAN_IO + bool diff --git a/drivers/dma/dw/Makefile b/drivers/dma/dw/Makefile new file mode 100644 index 000000000000..3eebd1ce2c6b --- /dev/null +++ b/drivers/dma/dw/Makefile @@ -0,0 +1,8 @@ +obj-$(CONFIG_DW_DMAC_CORE) += dw_dmac_core.o +dw_dmac_core-objs := core.o + +obj-$(CONFIG_DW_DMAC) += dw_dmac.o +dw_dmac-objs := platform.o + +obj-$(CONFIG_DW_DMAC_PCI) += dw_dmac_pci.o +dw_dmac_pci-objs := pci.o diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw/core.c index 2e5deaa82b60..eea479c12173 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw/core.c @@ -3,6 +3,7 @@ * * Copyright (C) 2007-2008 Atmel Corporation * Copyright (C) 2010-2011 ST Microelectronics + * Copyright (C) 2013 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,17 +20,12 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/io.h> -#include <linux/of.h> -#include <linux/of_dma.h> #include <linux/mm.h> #include <linux/module.h> -#include <linux/platform_device.h> #include <linux/slab.h> -#include <linux/acpi.h> -#include <linux/acpi_dma.h> -#include "dw_dmac_regs.h" -#include "dmaengine.h" +#include "../dmaengine.h" +#include "internal.h" /* * This supports the Synopsys "DesignWare AHB Central DMA Controller", @@ -41,16 +37,6 @@ * which does not support descriptor writeback. */ -static inline unsigned int dwc_get_dms(struct dw_dma_slave *slave) -{ - return slave ? slave->dst_master : 0; -} - -static inline unsigned int dwc_get_sms(struct dw_dma_slave *slave) -{ - return slave ? slave->src_master : 1; -} - static inline void dwc_set_masters(struct dw_dma_chan *dwc) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); @@ -556,14 +542,14 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) /* --------------------- Cyclic DMA API extensions -------------------- */ -inline dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan) +dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); return channel_readl(dwc, SAR); } EXPORT_SYMBOL(dw_dma_get_src_addr); -inline dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan) +dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); return channel_readl(dwc, DAR); @@ -1225,99 +1211,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan) dev_vdbg(chan2dev(chan), "%s: done\n", __func__); } -/*----------------------------------------------------------------------*/ - -struct dw_dma_of_filter_args { - struct dw_dma *dw; - unsigned int req; - unsigned int src; - unsigned int dst; -}; - -static bool dw_dma_of_filter(struct dma_chan *chan, void *param) -{ - struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dw_dma_of_filter_args *fargs = param; - - /* Ensure the device matches our channel */ - if (chan->device != &fargs->dw->dma) - return false; - - dwc->request_line = fargs->req; - dwc->src_master = fargs->src; - dwc->dst_master = fargs->dst; - - return true; -} - -static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec, - struct of_dma *ofdma) -{ - struct dw_dma *dw = ofdma->of_dma_data; - struct dw_dma_of_filter_args fargs = { - .dw = dw, - }; - dma_cap_mask_t cap; - - if (dma_spec->args_count != 3) - return NULL; - - fargs.req = dma_spec->args[0]; - fargs.src = dma_spec->args[1]; - fargs.dst = dma_spec->args[2]; - - if (WARN_ON(fargs.req >= DW_DMA_MAX_NR_REQUESTS || - fargs.src >= dw->nr_masters || - fargs.dst >= dw->nr_masters)) - return NULL; - - dma_cap_zero(cap); - dma_cap_set(DMA_SLAVE, cap); - - /* TODO: there should be a simpler way to do this */ - return dma_request_channel(cap, dw_dma_of_filter, &fargs); -} - -#ifdef CONFIG_ACPI -static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param) -{ - struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct acpi_dma_spec *dma_spec = param; - - if (chan->device->dev != dma_spec->dev || - chan->chan_id != dma_spec->chan_id) - return false; - - dwc->request_line = dma_spec->slave_id; - dwc->src_master = dwc_get_sms(NULL); - dwc->dst_master = dwc_get_dms(NULL); - - return true; -} - -static void dw_dma_acpi_controller_register(struct dw_dma *dw) -{ - struct device *dev = dw->dma.dev; - struct acpi_dma_filter_info *info; - int ret; - - info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); - if (!info) - return; - - dma_cap_zero(info->dma_cap); - dma_cap_set(DMA_SLAVE, info->dma_cap); - info->filter_fn = dw_dma_acpi_filter; - - ret = devm_acpi_dma_controller_register(dev, acpi_dma_simple_xlate, - info); - if (ret) - dev_err(dev, "could not register acpi_dma_controller\n"); -} -#else /* !CONFIG_ACPI */ -static inline void dw_dma_acpi_controller_register(struct dw_dma *dw) {} -#endif /* !CONFIG_ACPI */ - /* --------------------- Cyclic DMA API extensions -------------------- */ /** @@ -1598,104 +1491,24 @@ static void dw_dma_off(struct dw_dma *dw) dw->chan[i].initialized = false; } -#ifdef CONFIG_OF -static struct dw_dma_platform_data * -dw_dma_parse_dt(struct platform_device *pdev) +int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) { - struct device_node *np = pdev->dev.of_node; - struct dw_dma_platform_data *pdata; - u32 tmp, arr[4]; - - if (!np) { - dev_err(&pdev->dev, "Missing DT data\n"); - return NULL; - } - - pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) - return NULL; - - if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels)) - return NULL; - - if (of_property_read_bool(np, "is_private")) - pdata->is_private = true; - - if (!of_property_read_u32(np, "chan_allocation_order", &tmp)) - pdata->chan_allocation_order = (unsigned char)tmp; - - if (!of_property_read_u32(np, "chan_priority", &tmp)) - pdata->chan_priority = tmp; - - if (!of_property_read_u32(np, "block_size", &tmp)) - pdata->block_size = tmp; - - if (!of_property_read_u32(np, "dma-masters", &tmp)) { - if (tmp > 4) - return NULL; - - pdata->nr_masters = tmp; - } - - if (!of_property_read_u32_array(np, "data_width", arr, - pdata->nr_masters)) - for (tmp = 0; tmp < pdata->nr_masters; tmp++) - pdata->data_width[tmp] = arr[tmp]; - - return pdata; -} -#else -static inline struct dw_dma_platform_data * -dw_dma_parse_dt(struct platform_device *pdev) -{ - return NULL; -} -#endif - -static int dw_probe(struct platform_device *pdev) -{ - struct dw_dma_platform_data *pdata; - struct resource *io; struct dw_dma *dw; size_t size; - void __iomem *regs; bool autocfg; unsigned int dw_params; unsigned int nr_channels; unsigned int max_blk_size = 0; - int irq; int err; int i; - io = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!io) - return -EINVAL; - - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - regs = devm_ioremap_resource(&pdev->dev, io); - if (IS_ERR(regs)) - return PTR_ERR(regs); - - /* Apply default dma_mask if needed */ - if (!pdev->dev.dma_mask) { - pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask; - pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); - } - - dw_params = dma_read_byaddr(regs, DW_PARAMS); + dw_params = dma_read_byaddr(chip->regs, DW_PARAMS); autocfg = dw_params >> DW_PARAMS_EN & 0x1; - dev_dbg(&pdev->dev, "DW_PARAMS: 0x%08x\n", dw_params); - - pdata = dev_get_platdata(&pdev->dev); - if (!pdata) - pdata = dw_dma_parse_dt(pdev); + dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params); if (!pdata && autocfg) { - pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; @@ -1712,16 +1525,17 @@ static int dw_probe(struct platform_device *pdev) nr_channels = pdata->nr_channels; size = sizeof(struct dw_dma) + nr_channels * sizeof(struct dw_dma_chan); - dw = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + dw = devm_kzalloc(chip->dev, size, GFP_KERNEL); if (!dw) return -ENOMEM; - dw->clk = devm_clk_get(&pdev->dev, "hclk"); + dw->clk = devm_clk_get(chip->dev, "hclk"); if (IS_ERR(dw->clk)) return PTR_ERR(dw->clk); clk_prepare_enable(dw->clk); - dw->regs = regs; + dw->regs = chip->regs; + chip->dw = dw; /* Get hardware configuration parameters */ if (autocfg) { @@ -1746,18 +1560,16 @@ static int dw_probe(struct platform_device *pdev) /* Disable BLOCK interrupts as well */ channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); - err = devm_request_irq(&pdev->dev, irq, dw_dma_interrupt, 0, + err = devm_request_irq(chip->dev, chip->irq, dw_dma_interrupt, 0, "dw_dmac", dw); if (err) return err; - platform_set_drvdata(pdev, dw); - /* Create a pool of consistent memory blocks for hardware descriptors */ - dw->desc_pool = dmam_pool_create("dw_dmac_desc_pool", &pdev->dev, + dw->desc_pool = dmam_pool_create("dw_dmac_desc_pool", chip->dev, sizeof(struct dw_desc), 4, 0); if (!dw->desc_pool) { - dev_err(&pdev->dev, "No memory for descriptors dma pool\n"); + dev_err(chip->dev, "No memory for descriptors dma pool\n"); return -ENOMEM; } @@ -1798,12 +1610,12 @@ static int dw_probe(struct platform_device *pdev) /* Hardware configuration */ if (autocfg) { unsigned int dwc_params; + void __iomem *addr = chip->regs + r * sizeof(u32); - dwc_params = dma_read_byaddr(regs + r * sizeof(u32), - DWC_PARAMS); + dwc_params = dma_read_byaddr(addr, DWC_PARAMS); - dev_dbg(&pdev->dev, "DWC_PARAMS[%d]: 0x%08x\n", i, - dwc_params); + dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i, + dwc_params); /* Decode maximum block size for given channel. The * stored 4 bit value represents blocks from 0x00 for 3 @@ -1834,7 +1646,7 @@ static int dw_probe(struct platform_device *pdev) dma_cap_set(DMA_SLAVE, dw->dma.cap_mask); if (pdata->is_private) dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask); - dw->dma.dev = &pdev->dev; + dw->dma.dev = chip->dev; dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources; dw->dma.device_free_chan_resources = dwc_free_chan_resources; @@ -1848,32 +1660,20 @@ static int dw_probe(struct platform_device *pdev) dma_writel(dw, CFG, DW_CFG_DMA_EN); - dev_info(&pdev->dev, "DesignWare DMA Controller, %d channels\n", + dev_info(chip->dev, "DesignWare DMA Controller, %d channels\n", nr_channels); dma_async_device_register(&dw->dma); - if (pdev->dev.of_node) { - err = of_dma_controller_register(pdev->dev.of_node, - dw_dma_of_xlate, dw); - if (err) - dev_err(&pdev->dev, - "could not register of_dma_controller\n"); - } - - if (ACPI_HANDLE(&pdev->dev)) - dw_dma_acpi_controller_register(dw); - return 0; } +EXPORT_SYMBOL_GPL(dw_dma_probe); -static int dw_remove(struct platform_device *pdev) +int dw_dma_remove(struct dw_dma_chip *chip) { - struct dw_dma *dw = platform_get_drvdata(pdev); + struct dw_dma *dw = chip->dw; struct dw_dma_chan *dwc, *_dwc; - if (pdev->dev.of_node) - of_dma_controller_free(pdev->dev.of_node); dw_dma_off(dw); dma_async_device_unregister(&dw->dma); @@ -1887,86 +1687,44 @@ static int dw_remove(struct platform_device *pdev) return 0; } +EXPORT_SYMBOL_GPL(dw_dma_remove); -static void dw_shutdown(struct platform_device *pdev) +void dw_dma_shutdown(struct dw_dma_chip *chip) { - struct dw_dma *dw = platform_get_drvdata(pdev); + struct dw_dma *dw = chip->dw; dw_dma_off(dw); clk_disable_unprepare(dw->clk); } +EXPORT_SYMBOL_GPL(dw_dma_shutdown); + +#ifdef CONFIG_PM_SLEEP -static int dw_suspend_noirq(struct device *dev) +int dw_dma_suspend(struct dw_dma_chip *chip) { - struct platform_device *pdev = to_platform_device(dev); - struct dw_dma *dw = platform_get_drvdata(pdev); + struct dw_dma *dw = chip->dw; dw_dma_off(dw); clk_disable_unprepare(dw->clk); return 0; } +EXPORT_SYMBOL_GPL(dw_dma_suspend); -static int dw_resume_noirq(struct device *dev) +int dw_dma_resume(struct dw_dma_chip *chip) { - struct platform_device *pdev = to_platform_device(dev); - struct dw_dma *dw = platform_get_drvdata(pdev); + struct dw_dma *dw = chip->dw; clk_prepare_enable(dw->clk); dma_writel(dw, CFG, DW_CFG_DMA_EN); return 0; } +EXPORT_SYMBOL_GPL(dw_dma_resume); -static const struct dev_pm_ops dw_dev_pm_ops = { - .suspend_noirq = dw_suspend_noirq, - .resume_noirq = dw_resume_noirq, - .freeze_noirq = dw_suspend_noirq, - .thaw_noirq = dw_resume_noirq, - .restore_noirq = dw_resume_noirq, - .poweroff_noirq = dw_suspend_noirq, -}; - -#ifdef CONFIG_OF -static const struct of_device_id dw_dma_of_id_table[] = { - { .compatible = "snps,dma-spear1340" }, - {} -}; -MODULE_DEVICE_TABLE(of, dw_dma_of_id_table); -#endif - -#ifdef CONFIG_ACPI -static const struct acpi_device_id dw_dma_acpi_id_table[] = { - { "INTL9C60", 0 }, - { } -}; -#endif - -static struct platform_driver dw_driver = { - .probe = dw_probe, - .remove = dw_remove, - .shutdown = dw_shutdown, - .driver = { - .name = "dw_dmac", - .pm = &dw_dev_pm_ops, - .of_match_table = of_match_ptr(dw_dma_of_id_table), - .acpi_match_table = ACPI_PTR(dw_dma_acpi_id_table), - }, -}; - -static int __init dw_init(void) -{ - return platform_driver_register(&dw_driver); -} -subsys_initcall(dw_init); - -static void __exit dw_exit(void) -{ - platform_driver_unregister(&dw_driver); -} -module_exit(dw_exit); +#endif /* CONFIG_PM_SLEEP */ MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver"); +MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller core driver"); MODULE_AUTHOR("Haavard Skinnemoen (Atmel)"); MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>"); diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h new file mode 100644 index 000000000000..32667f9e0dda --- /dev/null +++ b/drivers/dma/dw/internal.h @@ -0,0 +1,70 @@ +/* + * Driver for the Synopsys DesignWare DMA Controller + * + * Copyright (C) 2013 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _DW_DMAC_INTERNAL_H +#define _DW_DMAC_INTERNAL_H + +#include <linux/device.h> +#include <linux/dw_dmac.h> + +#include "regs.h" + +/** + * struct dw_dma_chip - representation of DesignWare DMA controller hardware + * @dev: struct device of the DMA controller + * @irq: irq line + * @regs: memory mapped I/O space + * @dw: struct dw_dma that is filed by dw_dma_probe() + */ +struct dw_dma_chip { + struct device *dev; + int irq; + void __iomem *regs; + struct dw_dma *dw; +}; + +/* Export to the platform drivers */ +int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata); +int dw_dma_remove(struct dw_dma_chip *chip); + +void dw_dma_shutdown(struct dw_dma_chip *chip); + +#ifdef CONFIG_PM_SLEEP + +int dw_dma_suspend(struct dw_dma_chip *chip); +int dw_dma_resume(struct dw_dma_chip *chip); + +#endif /* CONFIG_PM_SLEEP */ + +/** + * dwc_get_dms - get destination master + * @slave: pointer to the custom slave configuration + * + * Returns destination master in the custom slave configuration if defined, or + * default value otherwise. + */ +static inline unsigned int dwc_get_dms(struct dw_dma_slave *slave) +{ + return slave ? slave->dst_master : 0; +} + +/** + * dwc_get_sms - get source master + * @slave: pointer to the custom slave configuration + * + * Returns source master in the custom slave configuration if defined, or + * default value otherwise. + */ +static inline unsigned int dwc_get_sms(struct dw_dma_slave *slave) +{ + return slave ? slave->src_master : 1; +} + +#endif /* _DW_DMAC_INTERNAL_H */ diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c new file mode 100644 index 000000000000..e89fc24b8293 --- /dev/null +++ b/drivers/dma/dw/pci.c @@ -0,0 +1,101 @@ +/* + * PCI driver for the Synopsys DesignWare DMA Controller + * + * Copyright (C) 2013 Intel Corporation + * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/device.h> + +#include "internal.h" + +static struct dw_dma_platform_data dw_pci_pdata = { + .is_private = 1, + .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, + .chan_priority = CHAN_PRIORITY_ASCENDING, +}; + +static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) +{ + struct dw_dma_chip *chip; + struct dw_dma_platform_data *pdata = (void *)pid->driver_data; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + ret = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev)); + if (ret) { + dev_err(&pdev->dev, "I/O memory remapping failed\n"); + return ret; + } + + pci_set_master(pdev); + pci_try_set_mwi(pdev); + + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (ret) + return ret; + + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (ret) + return ret; + + chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + chip->dev = &pdev->dev; + chip->regs = pcim_iomap_table(pdev)[0]; + chip->irq = pdev->irq; + + ret = dw_dma_probe(chip, pdata); + if (ret) + return ret; + + pci_set_drvdata(pdev, chip); + + return 0; +} + +static void dw_pci_remove(struct pci_dev *pdev) +{ + struct dw_dma_chip *chip = pci_get_drvdata(pdev); + int ret; + + ret = dw_dma_remove(chip); + if (ret) + dev_warn(&pdev->dev, "can't remove device properly: %d\n", ret); +} + +static DEFINE_PCI_DEVICE_TABLE(dw_pci_id_table) = { + /* Medfield */ + { PCI_VDEVICE(INTEL, 0x0827), (kernel_ulong_t)&dw_pci_pdata }, + { PCI_VDEVICE(INTEL, 0x0830), (kernel_ulong_t)&dw_pci_pdata }, + + /* BayTrail */ + { PCI_VDEVICE(INTEL, 0x0f06), (kernel_ulong_t)&dw_pci_pdata }, + { PCI_VDEVICE(INTEL, 0x0f40), (kernel_ulong_t)&dw_pci_pdata }, + { } +}; +MODULE_DEVICE_TABLE(pci, dw_pci_id_table); + +static struct pci_driver dw_pci_driver = { + .name = "dw_dmac_pci", + .id_table = dw_pci_id_table, + .probe = dw_pci_probe, + .remove = dw_pci_remove, +}; + +module_pci_driver(dw_pci_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller PCI driver"); +MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>"); diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c new file mode 100644 index 000000000000..6c9449cffae8 --- /dev/null +++ b/drivers/dma/dw/platform.c @@ -0,0 +1,317 @@ +/* + * Platform driver for the Synopsys DesignWare DMA Controller + * + * Copyright (C) 2007-2008 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics + * Copyright (C) 2013 Intel Corporation + * + * Some parts of this driver are derived from the original dw_dmac. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/clk.h> +#include <linux/platform_device.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/of.h> +#include <linux/of_dma.h> +#include <linux/acpi.h> +#include <linux/acpi_dma.h> + +#include "internal.h" + +struct dw_dma_of_filter_args { + struct dw_dma *dw; + unsigned int req; + unsigned int src; + unsigned int dst; +}; + +static bool dw_dma_of_filter(struct dma_chan *chan, void *param) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma_of_filter_args *fargs = param; + + /* Ensure the device matches our channel */ + if (chan->device != &fargs->dw->dma) + return false; + + dwc->request_line = fargs->req; + dwc->src_master = fargs->src; + dwc->dst_master = fargs->dst; + + return true; +} + +static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dw_dma *dw = ofdma->of_dma_data; + struct dw_dma_of_filter_args fargs = { + .dw = dw, + }; + dma_cap_mask_t cap; + + if (dma_spec->args_count != 3) + return NULL; + + fargs.req = dma_spec->args[0]; + fargs.src = dma_spec->args[1]; + fargs.dst = dma_spec->args[2]; + + if (WARN_ON(fargs.req >= DW_DMA_MAX_NR_REQUESTS || + fargs.src >= dw->nr_masters || + fargs.dst >= dw->nr_masters)) + return NULL; + + dma_cap_zero(cap); + dma_cap_set(DMA_SLAVE, cap); + + /* TODO: there should be a simpler way to do this */ + return dma_request_channel(cap, dw_dma_of_filter, &fargs); +} + +#ifdef CONFIG_ACPI +static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct acpi_dma_spec *dma_spec = param; + + if (chan->device->dev != dma_spec->dev || + chan->chan_id != dma_spec->chan_id) + return false; + + dwc->request_line = dma_spec->slave_id; + dwc->src_master = dwc_get_sms(NULL); + dwc->dst_master = dwc_get_dms(NULL); + + return true; +} + +static void dw_dma_acpi_controller_register(struct dw_dma *dw) +{ + struct device *dev = dw->dma.dev; + struct acpi_dma_filter_info *info; + int ret; + + info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); + if (!info) + return; + + dma_cap_zero(info->dma_cap); + dma_cap_set(DMA_SLAVE, info->dma_cap); + info->filter_fn = dw_dma_acpi_filter; + + ret = devm_acpi_dma_controller_register(dev, acpi_dma_simple_xlate, + info); + if (ret) + dev_err(dev, "could not register acpi_dma_controller\n"); +} +#else /* !CONFIG_ACPI */ +static inline void dw_dma_acpi_controller_register(struct dw_dma *dw) {} +#endif /* !CONFIG_ACPI */ + +#ifdef CONFIG_OF +static struct dw_dma_platform_data * +dw_dma_parse_dt(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct dw_dma_platform_data *pdata; + u32 tmp, arr[4]; + + if (!np) { + dev_err(&pdev->dev, "Missing DT data\n"); + return NULL; + } + + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return NULL; + + if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels)) + return NULL; + + if (of_property_read_bool(np, "is_private")) + pdata->is_private = true; + + if (!of_property_read_u32(np, "chan_allocation_order", &tmp)) + pdata->chan_allocation_order = (unsigned char)tmp; + + if (!of_property_read_u32(np, "chan_priority", &tmp)) + pdata->chan_priority = tmp; + + if (!of_property_read_u32(np, "block_size", &tmp)) + pdata->block_size = tmp; + + if (!of_property_read_u32(np, "dma-masters", &tmp)) { + if (tmp > 4) + return NULL; + + pdata->nr_masters = tmp; + } + + if (!of_property_read_u32_array(np, "data_width", arr, + pdata->nr_masters)) + for (tmp = 0; tmp < pdata->nr_masters; tmp++) + pdata->data_width[tmp] = arr[tmp]; + + return pdata; +} +#else +static inline struct dw_dma_platform_data * +dw_dma_parse_dt(struct platform_device *pdev) +{ + return NULL; +} +#endif + +static int dw_probe(struct platform_device *pdev) +{ + struct dw_dma_chip *chip; + struct device *dev = &pdev->dev; + struct resource *mem; + struct dw_dma_platform_data *pdata; + int err; + + chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + chip->irq = platform_get_irq(pdev, 0); + if (chip->irq < 0) + return chip->irq; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + chip->regs = devm_ioremap_resource(dev, mem); + if (IS_ERR(chip->regs)) + return PTR_ERR(chip->regs); + + /* Apply default dma_mask if needed */ + if (!dev->dma_mask) { + dev->dma_mask = &dev->coherent_dma_mask; + dev->coherent_dma_mask = DMA_BIT_MASK(32); + } + + pdata = dev_get_platdata(dev); + if (!pdata) + pdata = dw_dma_parse_dt(pdev); + + chip->dev = dev; + + err = dw_dma_probe(chip, pdata); + if (err) + return err; + + platform_set_drvdata(pdev, chip); + + if (pdev->dev.of_node) { + err = of_dma_controller_register(pdev->dev.of_node, + dw_dma_of_xlate, chip->dw); + if (err) + dev_err(&pdev->dev, + "could not register of_dma_controller\n"); + } + + if (ACPI_HANDLE(&pdev->dev)) + dw_dma_acpi_controller_register(chip->dw); + + return 0; +} + +static int dw_remove(struct platform_device *pdev) +{ + struct dw_dma_chip *chip = platform_get_drvdata(pdev); + + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); + + return dw_dma_remove(chip); +} + +static void dw_shutdown(struct platform_device *pdev) +{ + struct dw_dma_chip *chip = platform_get_drvdata(pdev); + + dw_dma_shutdown(chip); +} + +#ifdef CONFIG_OF +static const struct of_device_id dw_dma_of_id_table[] = { + { .compatible = "snps,dma-spear1340" }, + {} +}; +MODULE_DEVICE_TABLE(of, dw_dma_of_id_table); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id dw_dma_acpi_id_table[] = { + { "INTL9C60", 0 }, + { } +}; +#endif + +#ifdef CONFIG_PM_SLEEP + +static int dw_suspend_noirq(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_dma_chip *chip = platform_get_drvdata(pdev); + + return dw_dma_suspend(chip); +} + +static int dw_resume_noirq(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_dma_chip *chip = platform_get_drvdata(pdev); + + return dw_dma_resume(chip); +} + +#else /* !CONFIG_PM_SLEEP */ + +#define dw_suspend_noirq NULL +#define dw_resume_noirq NULL + +#endif /* !CONFIG_PM_SLEEP */ + +static const struct dev_pm_ops dw_dev_pm_ops = { + .suspend_noirq = dw_suspend_noirq, + .resume_noirq = dw_resume_noirq, + .freeze_noirq = dw_suspend_noirq, + .thaw_noirq = dw_resume_noirq, + .restore_noirq = dw_resume_noirq, + .poweroff_noirq = dw_suspend_noirq, +}; + +static struct platform_driver dw_driver = { + .probe = dw_probe, + .remove = dw_remove, + .shutdown = dw_shutdown, + .driver = { + .name = "dw_dmac", + .pm = &dw_dev_pm_ops, + .of_match_table = of_match_ptr(dw_dma_of_id_table), + .acpi_match_table = ACPI_PTR(dw_dma_acpi_id_table), + }, +}; + +static int __init dw_init(void) +{ + return platform_driver_register(&dw_driver); +} +subsys_initcall(dw_init); + +static void __exit dw_exit(void) +{ + platform_driver_unregister(&dw_driver); +} +module_exit(dw_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller platform driver"); diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw/regs.h index 9d417200bd57..deb4274f80f4 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw/regs.h @@ -9,6 +9,7 @@ * published by the Free Software Foundation. */ +#include <linux/interrupt.h> #include <linux/dmaengine.h> #include <linux/dw_dmac.h> @@ -100,6 +101,12 @@ struct dw_dma_regs { u32 DW_PARAMS; }; +/* + * Big endian I/O access when reading and writing to the DMA controller + * registers. This is needed on some platforms, like the Atmel AVR32 + * architecture. + */ + #ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO #define dma_readl_native ioread32be #define dma_writel_native iowrite32be diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 4fc2980556ad..49e8fbdb8983 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -1368,7 +1368,7 @@ static int fsldma_of_probe(struct platform_device *op) dma_set_mask(&(op->dev), DMA_BIT_MASK(36)); - dev_set_drvdata(&op->dev, fdev); + platform_set_drvdata(op, fdev); /* * We cannot use of_platform_bus_probe() because there is no @@ -1417,7 +1417,7 @@ static int fsldma_of_remove(struct platform_device *op) struct fsldma_device *fdev; unsigned int i; - fdev = dev_get_drvdata(&op->dev); + fdev = platform_get_drvdata(op); dma_async_device_unregister(&fdev->common); fsldma_free_irqs(fdev); @@ -1428,7 +1428,6 @@ static int fsldma_of_remove(struct platform_device *op) } iounmap(fdev->regs); - dev_set_drvdata(&op->dev, NULL); kfree(fdev); return 0; diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index f28583370d00..ff2aab973b45 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -27,6 +27,8 @@ #include <linux/clk.h> #include <linux/dmaengine.h> #include <linux/module.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> #include <asm/irq.h> #include <linux/platform_data/dma-imx.h> @@ -186,6 +188,11 @@ struct imxdma_engine { enum imx_dma_type devtype; }; +struct imxdma_filter_data { + struct imxdma_engine *imxdma; + int request; +}; + static struct platform_device_id imx_dma_devtype[] = { { .name = "imx1-dma", @@ -202,6 +209,22 @@ static struct platform_device_id imx_dma_devtype[] = { }; MODULE_DEVICE_TABLE(platform, imx_dma_devtype); +static const struct of_device_id imx_dma_of_dev_id[] = { + { + .compatible = "fsl,imx1-dma", + .data = &imx_dma_devtype[IMX1_DMA], + }, { + .compatible = "fsl,imx21-dma", + .data = &imx_dma_devtype[IMX21_DMA], + }, { + .compatible = "fsl,imx27-dma", + .data = &imx_dma_devtype[IMX27_DMA], + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(of, imx_dma_of_dev_id); + static inline int is_imx1_dma(struct imxdma_engine *imxdma) { return imxdma->devtype == IMX1_DMA; @@ -996,17 +1019,55 @@ static void imxdma_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&imxdma->lock, flags); } +static bool imxdma_filter_fn(struct dma_chan *chan, void *param) +{ + struct imxdma_filter_data *fdata = param; + struct imxdma_channel *imxdma_chan = to_imxdma_chan(chan); + + if (chan->device->dev != fdata->imxdma->dev) + return false; + + imxdma_chan->dma_request = fdata->request; + chan->private = NULL; + + return true; +} + +static struct dma_chan *imxdma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + int count = dma_spec->args_count; + struct imxdma_engine *imxdma = ofdma->of_dma_data; + struct imxdma_filter_data fdata = { + .imxdma = imxdma, + }; + + if (count != 1) + return NULL; + + fdata.request = dma_spec->args[0]; + + return dma_request_channel(imxdma->dma_device.cap_mask, + imxdma_filter_fn, &fdata); +} + static int __init imxdma_probe(struct platform_device *pdev) { struct imxdma_engine *imxdma; struct resource *res; + const struct of_device_id *of_id; int ret, i; int irq, irq_err; + of_id = of_match_device(imx_dma_of_dev_id, &pdev->dev); + if (of_id) + pdev->id_entry = of_id->data; + imxdma = devm_kzalloc(&pdev->dev, sizeof(*imxdma), GFP_KERNEL); if (!imxdma) return -ENOMEM; + imxdma->dev = &pdev->dev; imxdma->devtype = pdev->id_entry->driver_data; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1111,7 +1172,6 @@ static int __init imxdma_probe(struct platform_device *pdev) &imxdma->dma_device.channels); } - imxdma->dev = &pdev->dev; imxdma->dma_device.dev = &pdev->dev; imxdma->dma_device.device_alloc_chan_resources = imxdma_alloc_chan_resources; @@ -1136,8 +1196,19 @@ static int __init imxdma_probe(struct platform_device *pdev) goto err; } + if (pdev->dev.of_node) { + ret = of_dma_controller_register(pdev->dev.of_node, + imxdma_xlate, imxdma); + if (ret) { + dev_err(&pdev->dev, "unable to register of_dma_controller\n"); + goto err_of_dma_controller; + } + } + return 0; +err_of_dma_controller: + dma_async_device_unregister(&imxdma->dma_device); err: clk_disable_unprepare(imxdma->dma_ipg); clk_disable_unprepare(imxdma->dma_ahb); @@ -1150,6 +1221,9 @@ static int imxdma_remove(struct platform_device *pdev) dma_async_device_unregister(&imxdma->dma_device); + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); + clk_disable_unprepare(imxdma->dma_ipg); clk_disable_unprepare(imxdma->dma_ahb); @@ -1159,6 +1233,7 @@ static int imxdma_remove(struct platform_device *pdev) static struct platform_driver imxdma_driver = { .driver = { .name = "imx-dma", + .of_match_table = imx_dma_of_dev_id, }, .id_table = imx_dma_devtype, .remove = imxdma_remove, diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 092867bf795c..1e44b8cf95da 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -36,6 +36,7 @@ #include <linux/dmaengine.h> #include <linux/of.h> #include <linux/of_device.h> +#include <linux/of_dma.h> #include <asm/irq.h> #include <linux/platform_data/dma-imx-sdma.h> @@ -1296,6 +1297,35 @@ err_dma_alloc: return ret; } +static bool sdma_filter_fn(struct dma_chan *chan, void *fn_param) +{ + struct imx_dma_data *data = fn_param; + + if (!imx_dma_is_general_purpose(chan)) + return false; + + chan->private = data; + + return true; +} + +static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct sdma_engine *sdma = ofdma->of_dma_data; + dma_cap_mask_t mask = sdma->dma_device.cap_mask; + struct imx_dma_data data; + + if (dma_spec->args_count != 3) + return NULL; + + data.dma_request = dma_spec->args[0]; + data.peripheral_type = dma_spec->args[1]; + data.priority = dma_spec->args[2]; + + return dma_request_channel(mask, sdma_filter_fn, &data); +} + static int __init sdma_probe(struct platform_device *pdev) { const struct of_device_id *of_id = @@ -1443,10 +1473,20 @@ static int __init sdma_probe(struct platform_device *pdev) goto err_init; } + if (np) { + ret = of_dma_controller_register(np, sdma_xlate, sdma); + if (ret) { + dev_err(&pdev->dev, "failed to register controller\n"); + goto err_register; + } + } + dev_info(sdma->dev, "initialized\n"); return 0; +err_register: + dma_async_device_unregister(&sdma->dma_device); err_init: kfree(sdma->script_addrs); err_alloc: diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c index 43d5a6c33297..9b9366537d73 100644 --- a/drivers/dma/mmp_tdma.c +++ b/drivers/dma/mmp_tdma.c @@ -154,6 +154,10 @@ static void mmp_tdma_disable_chan(struct mmp_tdma_chan *tdmac) { writel(readl(tdmac->reg_base + TDCR) & ~TDCR_CHANEN, tdmac->reg_base + TDCR); + + /* disable irq */ + writel(0, tdmac->reg_base + TDIMR); + tdmac->status = DMA_SUCCESS; } diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index b48a79c28845..719593002ab7 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -693,7 +693,7 @@ static bool mxs_dma_filter_fn(struct dma_chan *chan, void *fn_param) return true; } -struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec, +static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { struct mxs_dma_engine *mxs_dma = ofdma->of_dma_data; diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index 7aa0864cd487..75334bdd2c56 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -35,8 +35,7 @@ static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec) struct of_dma *ofdma; list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers) - if ((ofdma->of_node == dma_spec->np) && - (ofdma->of_dma_nbcells == dma_spec->args_count)) + if (ofdma->of_node == dma_spec->np) return ofdma; pr_debug("%s: can't find DMA controller %s\n", __func__, @@ -64,8 +63,6 @@ int of_dma_controller_register(struct device_node *np, void *data) { struct of_dma *ofdma; - int nbcells; - const __be32 *prop; if (!np || !of_dma_xlate) { pr_err("%s: not enough information provided\n", __func__); @@ -76,19 +73,7 @@ int of_dma_controller_register(struct device_node *np, if (!ofdma) return -ENOMEM; - prop = of_get_property(np, "#dma-cells", NULL); - if (prop) - nbcells = be32_to_cpup(prop); - - if (!prop || !nbcells) { - pr_err("%s: #dma-cells property is missing or invalid\n", - __func__); - kfree(ofdma); - return -EINVAL; - } - ofdma->of_node = np; - ofdma->of_dma_nbcells = nbcells; ofdma->of_dma_xlate = of_dma_xlate; ofdma->of_dma_data = data; diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 7ec82f0667eb..593827b3fdd4 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -157,7 +157,6 @@ enum pl330_reqtype { #define PERIPH_REV_R0P0 0 #define PERIPH_REV_R1P0 1 #define PERIPH_REV_R1P1 2 -#define PCELL_ID 0xff0 #define CR0_PERIPH_REQ_SET (1 << 0) #define CR0_BOOT_EN_SET (1 << 1) @@ -193,8 +192,6 @@ enum pl330_reqtype { #define INTEG_CFG 0x0 #define PERIPH_ID_VAL ((PART << 0) | (DESIGNER << 12)) -#define PCELL_ID_VAL 0xb105f00d - #define PL330_STATE_STOPPED (1 << 0) #define PL330_STATE_EXECUTING (1 << 1) #define PL330_STATE_WFE (1 << 2) @@ -292,7 +289,6 @@ static unsigned cmd_line; /* Populated by the PL330 core driver for DMA API driver's info */ struct pl330_config { u32 periph_id; - u32 pcell_id; #define DMAC_MODE_NS (1 << 0) unsigned int mode; unsigned int data_bus_width:10; /* In number of bits */ @@ -505,7 +501,7 @@ struct pl330_dmac { /* Maximum possible events/irqs */ int events[32]; /* BUS address of MicroCode buffer */ - u32 mcode_bus; + dma_addr_t mcode_bus; /* CPU address of MicroCode buffer */ void *mcode_cpu; /* List of all Channel threads */ @@ -650,19 +646,6 @@ static inline bool _manager_ns(struct pl330_thread *thrd) return (pl330->pinfo->pcfg.mode & DMAC_MODE_NS) ? true : false; } -static inline u32 get_id(struct pl330_info *pi, u32 off) -{ - void __iomem *regs = pi->base; - u32 id = 0; - - id |= (readb(regs + off + 0x0) << 0); - id |= (readb(regs + off + 0x4) << 8); - id |= (readb(regs + off + 0x8) << 16); - id |= (readb(regs + off + 0xc) << 24); - - return id; -} - static inline u32 get_revision(u32 periph_id) { return (periph_id >> PERIPH_REV_SHIFT) & PERIPH_REV_MASK; @@ -1986,9 +1969,6 @@ static void read_dmac_config(struct pl330_info *pi) pi->pcfg.num_events = val; pi->pcfg.irq_ns = readl(regs + CR3); - - pi->pcfg.periph_id = get_id(pi, PERIPH_ID); - pi->pcfg.pcell_id = get_id(pi, PCELL_ID); } static inline void _reset_thread(struct pl330_thread *thrd) @@ -2098,10 +2078,8 @@ static int pl330_add(struct pl330_info *pi) regs = pi->base; /* Check if we can handle this DMAC */ - if ((get_id(pi, PERIPH_ID) & 0xfffff) != PERIPH_ID_VAL - || get_id(pi, PCELL_ID) != PCELL_ID_VAL) { - dev_err(pi->dev, "PERIPH_ID 0x%x, PCELL_ID 0x%x !\n", - get_id(pi, PERIPH_ID), get_id(pi, PCELL_ID)); + if ((pi->pcfg.periph_id & 0xfffff) != PERIPH_ID_VAL) { + dev_err(pi->dev, "PERIPH_ID 0x%x !\n", pi->pcfg.periph_id); return -EINVAL; } @@ -2916,6 +2894,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) if (ret) return ret; + pi->pcfg.periph_id = adev->periphid; ret = pl330_add(pi); if (ret) goto probe_err1; diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index 1e220f8dfd8c..370ff8265630 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -4434,7 +4434,7 @@ static int ppc440spe_adma_probe(struct platform_device *ofdev) adev->dev = &ofdev->dev; adev->common.dev = &ofdev->dev; INIT_LIST_HEAD(&adev->common.channels); - dev_set_drvdata(&ofdev->dev, adev); + platform_set_drvdata(ofdev, adev); /* create a channel */ chan = kzalloc(sizeof(*chan), GFP_KERNEL); @@ -4547,14 +4547,13 @@ out: */ static int ppc440spe_adma_remove(struct platform_device *ofdev) { - struct ppc440spe_adma_device *adev = dev_get_drvdata(&ofdev->dev); + struct ppc440spe_adma_device *adev = platform_get_drvdata(ofdev); struct device_node *np = ofdev->dev.of_node; struct resource res; struct dma_chan *chan, *_chan; struct ppc_dma_chan_ref *ref, *_ref; struct ppc440spe_adma_chan *ppc440spe_chan; - dev_set_drvdata(&ofdev->dev, NULL); if (adev->id < PPC440SPE_ADMA_ENGINES_NUM) ppc440spe_adma_devices[adev->id] = -1; diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile index c07ca4612e46..c962138dde96 100644 --- a/drivers/dma/sh/Makefile +++ b/drivers/dma/sh/Makefile @@ -1,3 +1,3 @@ -obj-$(CONFIG_SH_DMAE_BASE) += shdma-base.o +obj-$(CONFIG_SH_DMAE_BASE) += shdma-base.o shdma-of.o obj-$(CONFIG_SH_DMAE) += shdma.o obj-$(CONFIG_SUDMAC) += sudmac.o diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index 4acb85a10250..28ca36121631 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -175,7 +175,18 @@ static int shdma_setup_slave(struct shdma_chan *schan, int slave_id) { struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device); const struct shdma_ops *ops = sdev->ops; - int ret; + int ret, match; + + if (schan->dev->of_node) { + match = schan->hw_req; + ret = ops->set_slave(schan, match, true); + if (ret < 0) + return ret; + + slave_id = schan->slave_id; + } else { + match = slave_id; + } if (slave_id < 0 || slave_id >= slave_num) return -EINVAL; @@ -183,7 +194,7 @@ static int shdma_setup_slave(struct shdma_chan *schan, int slave_id) if (test_and_set_bit(slave_id, shdma_slave_used)) return -EBUSY; - ret = ops->set_slave(schan, slave_id, false); + ret = ops->set_slave(schan, match, false); if (ret < 0) { clear_bit(slave_id, shdma_slave_used); return ret; @@ -206,23 +217,26 @@ static int shdma_setup_slave(struct shdma_chan *schan, int slave_id) * services would have to provide their own filters, which first would check * the device driver, similar to how other DMAC drivers, e.g., sa11x0-dma.c, do * this, and only then, in case of a match, call this common filter. + * NOTE 2: This filter function is also used in the DT case by shdma_of_xlate(). + * In that case the MID-RID value is used for slave channel filtering and is + * passed to this function in the "arg" parameter. */ bool shdma_chan_filter(struct dma_chan *chan, void *arg) { struct shdma_chan *schan = to_shdma_chan(chan); struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device); const struct shdma_ops *ops = sdev->ops; - int slave_id = (int)arg; + int match = (int)arg; int ret; - if (slave_id < 0) + if (match < 0) /* No slave requested - arbitrary channel */ return true; - if (slave_id >= slave_num) + if (!schan->dev->of_node && match >= slave_num) return false; - ret = ops->set_slave(schan, slave_id, true); + ret = ops->set_slave(schan, match, true); if (ret < 0) return false; diff --git a/drivers/dma/sh/shdma-of.c b/drivers/dma/sh/shdma-of.c new file mode 100644 index 000000000000..11bcb05cd79c --- /dev/null +++ b/drivers/dma/sh/shdma-of.c @@ -0,0 +1,82 @@ +/* + * SHDMA Device Tree glue + * + * Copyright (C) 2013 Renesas Electronics Inc. + * Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de> + * + * This is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + */ + +#include <linux/dmaengine.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_dma.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/shdma-base.h> + +#define to_shdma_chan(c) container_of(c, struct shdma_chan, dma_chan) + +static struct dma_chan *shdma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + u32 id = dma_spec->args[0]; + dma_cap_mask_t mask; + struct dma_chan *chan; + + if (dma_spec->args_count != 1) + return NULL; + + dma_cap_zero(mask); + /* Only slave DMA channels can be allocated via DT */ + dma_cap_set(DMA_SLAVE, mask); + + chan = dma_request_channel(mask, shdma_chan_filter, (void *)id); + if (chan) + to_shdma_chan(chan)->hw_req = id; + + return chan; +} + +static int shdma_of_probe(struct platform_device *pdev) +{ + const struct of_dev_auxdata *lookup = pdev->dev.platform_data; + int ret; + + if (!lookup) + return -EINVAL; + + ret = of_dma_controller_register(pdev->dev.of_node, + shdma_of_xlate, pdev); + if (ret < 0) + return ret; + + ret = of_platform_populate(pdev->dev.of_node, NULL, lookup, &pdev->dev); + if (ret < 0) + of_dma_controller_free(pdev->dev.of_node); + + return ret; +} + +static const struct of_device_id shdma_of_match[] = { + { .compatible = "renesas,shdma-mux", }, + { } +}; +MODULE_DEVICE_TABLE(of, sh_dmae_of_match); + +static struct platform_driver shdma_of = { + .driver = { + .owner = THIS_MODULE, + .name = "shdma-of", + .of_match_table = shdma_of_match, + }, + .probe = shdma_of_probe, +}; + +module_platform_driver(shdma_of); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("SH-DMA driver DT glue"); +MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>"); diff --git a/drivers/dma/sh/shdma.c b/drivers/dma/sh/shdma.c index b70709b030d8..b67f45f5c271 100644 --- a/drivers/dma/sh/shdma.c +++ b/drivers/dma/sh/shdma.c @@ -301,20 +301,32 @@ static void sh_dmae_setup_xfer(struct shdma_chan *schan, } } +/* + * Find a slave channel configuration from the contoller list by either a slave + * ID in the non-DT case, or by a MID/RID value in the DT case + */ static const struct sh_dmae_slave_config *dmae_find_slave( - struct sh_dmae_chan *sh_chan, int slave_id) + struct sh_dmae_chan *sh_chan, int match) { struct sh_dmae_device *shdev = to_sh_dev(sh_chan); struct sh_dmae_pdata *pdata = shdev->pdata; const struct sh_dmae_slave_config *cfg; int i; - if (slave_id >= SH_DMA_SLAVE_NUMBER) - return NULL; + if (!sh_chan->shdma_chan.dev->of_node) { + if (match >= SH_DMA_SLAVE_NUMBER) + return NULL; - for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++) - if (cfg->slave_id == slave_id) - return cfg; + for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++) + if (cfg->slave_id == match) + return cfg; + } else { + for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++) + if (cfg->mid_rid == match) { + sh_chan->shdma_chan.slave_id = cfg->slave_id; + return cfg; + } + } return NULL; } @@ -729,7 +741,7 @@ static int sh_dmae_probe(struct platform_device *pdev) goto eshdma; /* platform data */ - shdev->pdata = pdev->dev.platform_data; + shdev->pdata = pdata; if (pdata->chcr_offset) shdev->chcr_offset = pdata->chcr_offset; @@ -920,11 +932,18 @@ static int sh_dmae_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id sh_dmae_of_match[] = { + { .compatible = "renesas,shdma", }, + { } +}; +MODULE_DEVICE_TABLE(of, sh_dmae_of_match); + static struct platform_driver sh_dmae_driver = { .driver = { .owner = THIS_MODULE, .pm = &sh_dmae_pm, .name = SH_DMAE_DRV_NAME, + .of_match_table = sh_dmae_of_match, }, .remove = sh_dmae_remove, .shutdown = sh_dmae_shutdown, diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c index 1765a0a2736d..716b23e4f327 100644 --- a/drivers/dma/sirf-dma.c +++ b/drivers/dma/sirf-dma.c @@ -466,12 +466,29 @@ static enum dma_status sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie, struct dma_tx_state *txstate) { + struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan); struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan); unsigned long flags; enum dma_status ret; + struct sirfsoc_dma_desc *sdesc; + int cid = schan->chan.chan_id; + unsigned long dma_pos; + unsigned long dma_request_bytes; + unsigned long residue; spin_lock_irqsave(&schan->lock, flags); + + sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, + node); + dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) * + (sdesc->width * SIRFSOC_DMA_WORD_LEN); + ret = dma_cookie_status(chan, cookie, txstate); + dma_pos = readl_relaxed(sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR) + << 2; + residue = dma_request_bytes - (dma_pos - sdesc->addr); + dma_set_residue(txstate, residue); + spin_unlock_irqrestore(&schan->lock, flags); return ret; diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 33f59ecd256e..f137914d7b16 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -1191,6 +1191,7 @@ static void tegra_dma_free_chan_resources(struct dma_chan *dc) list_splice_init(&tdc->free_dma_desc, &dma_desc_list); INIT_LIST_HEAD(&tdc->cb_desc); tdc->config_init = false; + tdc->isr_handler = NULL; spin_unlock_irqrestore(&tdc->lock, flags); while (!list_empty(&dma_desc_list)) { @@ -1334,7 +1335,7 @@ static int tegra_dma_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "request_irq failed with err %d channel %d\n", - i, ret); + ret, i); goto err_irq; } diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c index 26107ba6edb3..0ef43c136aa7 100644 --- a/drivers/dma/timb_dma.c +++ b/drivers/dma/timb_dma.c @@ -811,8 +811,6 @@ static int td_remove(struct platform_device *pdev) kfree(td); release_mem_region(iomem->start, resource_size(iomem)); - platform_set_drvdata(pdev, NULL); - dev_dbg(&pdev->dev, "Removed...\n"); return 0; } diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 0aaece9107c7..315dcc6ec1f5 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -47,7 +47,7 @@ #define MTD_PARAM_LEN_MAX 64 /* Maximum number of comma-separated items in the 'mtd=' parameter */ -#define MTD_PARAM_MAX_COUNT 3 +#define MTD_PARAM_MAX_COUNT 4 /* Maximum value for the number of bad PEBs per 1024 PEBs */ #define MAX_MTD_UBI_BEB_LIMIT 768 @@ -67,6 +67,7 @@ */ struct mtd_dev_param { char name[MTD_PARAM_LEN_MAX]; + int ubi_num; int vid_hdr_offs; int max_beb_per1024; }; @@ -1261,11 +1262,15 @@ static int __init ubi_init(void) mtd = open_mtd_device(p->name); if (IS_ERR(mtd)) { err = PTR_ERR(mtd); - goto out_detach; + ubi_err("cannot open mtd %s, error %d", p->name, err); + /* See comment below re-ubi_is_module(). */ + if (ubi_is_module()) + goto out_detach; + continue; } mutex_lock(&ubi_devices_mutex); - err = ubi_attach_mtd_dev(mtd, UBI_DEV_NUM_AUTO, + err = ubi_attach_mtd_dev(mtd, p->ubi_num, p->vid_hdr_offs, p->max_beb_per1024); mutex_unlock(&ubi_devices_mutex); if (err < 0) { @@ -1309,7 +1314,7 @@ out_version: out_class: class_destroy(ubi_class); out: - ubi_err("UBI error: cannot initialize UBI, error %d", err); + ubi_err("cannot initialize UBI, error %d", err); return err; } late_initcall(ubi_init); @@ -1346,7 +1351,7 @@ static int __init bytes_str_to_int(const char *str) result = simple_strtoul(str, &endp, 0); if (str == endp || result >= INT_MAX) { - ubi_err("UBI error: incorrect bytes count: \"%s\"\n", str); + ubi_err("incorrect bytes count: \"%s\"\n", str); return -EINVAL; } @@ -1362,7 +1367,7 @@ static int __init bytes_str_to_int(const char *str) case '\0': break; default: - ubi_err("UBI error: incorrect bytes count: \"%s\"\n", str); + ubi_err("incorrect bytes count: \"%s\"\n", str); return -EINVAL; } @@ -1383,20 +1388,20 @@ static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp) struct mtd_dev_param *p; char buf[MTD_PARAM_LEN_MAX]; char *pbuf = &buf[0]; - char *tokens[MTD_PARAM_MAX_COUNT]; + char *tokens[MTD_PARAM_MAX_COUNT], *token; if (!val) return -EINVAL; if (mtd_devs == UBI_MAX_DEVICES) { - ubi_err("UBI error: too many parameters, max. is %d\n", + ubi_err("too many parameters, max. is %d\n", UBI_MAX_DEVICES); return -EINVAL; } len = strnlen(val, MTD_PARAM_LEN_MAX); if (len == MTD_PARAM_LEN_MAX) { - ubi_err("UBI error: parameter \"%s\" is too long, max. is %d\n", + ubi_err("parameter \"%s\" is too long, max. is %d\n", val, MTD_PARAM_LEN_MAX); return -EINVAL; } @@ -1416,44 +1421,60 @@ static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp) tokens[i] = strsep(&pbuf, ","); if (pbuf) { - ubi_err("UBI error: too many arguments at \"%s\"\n", val); + ubi_err("too many arguments at \"%s\"\n", val); return -EINVAL; } p = &mtd_dev_param[mtd_devs]; strcpy(&p->name[0], tokens[0]); - if (tokens[1]) - p->vid_hdr_offs = bytes_str_to_int(tokens[1]); + token = tokens[1]; + if (token) { + p->vid_hdr_offs = bytes_str_to_int(token); - if (p->vid_hdr_offs < 0) - return p->vid_hdr_offs; + if (p->vid_hdr_offs < 0) + return p->vid_hdr_offs; + } - if (tokens[2]) { - int err = kstrtoint(tokens[2], 10, &p->max_beb_per1024); + token = tokens[2]; + if (token) { + int err = kstrtoint(token, 10, &p->max_beb_per1024); if (err) { - ubi_err("UBI error: bad value for max_beb_per1024 parameter: %s", - tokens[2]); + ubi_err("bad value for max_beb_per1024 parameter: %s", + token); return -EINVAL; } } + token = tokens[3]; + if (token) { + int err = kstrtoint(token, 10, &p->ubi_num); + + if (err) { + ubi_err("bad value for ubi_num parameter: %s", token); + return -EINVAL; + } + } else + p->ubi_num = UBI_DEV_NUM_AUTO; + mtd_devs += 1; return 0; } module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000); -MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: mtd=<name|num|path>[,<vid_hdr_offs>[,max_beb_per1024]].\n" +MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: mtd=<name|num|path>[,<vid_hdr_offs>[,max_beb_per1024[,ubi_num]]].\n" "Multiple \"mtd\" parameters may be specified.\n" "MTD devices may be specified by their number, name, or path to the MTD character device node.\n" "Optional \"vid_hdr_offs\" parameter specifies UBI VID header position to be used by UBI. (default value if 0)\n" "Optional \"max_beb_per1024\" parameter specifies the maximum expected bad eraseblock per 1024 eraseblocks. (default value (" __stringify(CONFIG_MTD_UBI_BEB_LIMIT) ") if 0)\n" + "Optional \"ubi_num\" parameter specifies UBI device number which have to be assigned to the newly created UBI device (assigned automatically by default)\n" "\n" "Example 1: mtd=/dev/mtd0 - attach MTD device /dev/mtd0.\n" "Example 2: mtd=content,1984 mtd=4 - attach MTD device with name \"content\" using VID header offset 1984, and MTD device number 4 with default VID header offset.\n" "Example 3: mtd=/dev/mtd1,0,25 - attach MTD device /dev/mtd1 using default VID header offset and reserve 25*nand_size_in_blocks/1024 erase blocks for bad block handling.\n" + "Example 4: mtd=/dev/mtd1,0,0,5 - attach MTD device /dev/mtd1 to UBI 5 and using default values for the other fields.\n" "\t(e.g. if the NAND *chipset* has 4096 PEB, 100 will be reserved for this UBI device)."); #ifdef CONFIG_MTD_UBI_FASTMAP module_param(fm_autoconvert, bool, 0644); diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 0648c6996d43..154275182b4b 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -727,8 +727,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, aeb = NULL; list_for_each_entry(tmp_aeb, &used, u.list) { - if (tmp_aeb->pnum == pnum) + if (tmp_aeb->pnum == pnum) { aeb = tmp_aeb; + break; + } } /* This can happen if a PEB is already in an EBA known diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 9385b4524547..624e8dc24532 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -168,9 +168,6 @@ out_resume: dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE); - /* Make sure timer events get retriggered on all CPUs */ - clock_was_set(); - out_thaw: #ifdef CONFIG_PREEMPT thaw_processes(); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 64e5323cbbb0..5e7c60c1cb63 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -387,7 +387,7 @@ static void remove_dir(struct dentry * d) if (d->d_inode) simple_rmdir(parent->d_inode,d); - pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count); + pr_debug(" o %s removing done (%d)\n",d->d_name.name, d_count(d)); dput(parent); } diff --git a/fs/timerfd.c b/fs/timerfd.c index 32b644f03690..929312180dd0 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -8,6 +8,7 @@ * */ +#include <linux/alarmtimer.h> #include <linux/file.h> #include <linux/poll.h> #include <linux/init.h> @@ -26,7 +27,10 @@ #include <linux/rcupdate.h> struct timerfd_ctx { - struct hrtimer tmr; + union { + struct hrtimer tmr; + struct alarm alarm; + } t; ktime_t tintv; ktime_t moffs; wait_queue_head_t wqh; @@ -41,14 +45,19 @@ struct timerfd_ctx { static LIST_HEAD(cancel_list); static DEFINE_SPINLOCK(cancel_lock); +static inline bool isalarm(struct timerfd_ctx *ctx) +{ + return ctx->clockid == CLOCK_REALTIME_ALARM || + ctx->clockid == CLOCK_BOOTTIME_ALARM; +} + /* * This gets called when the timer event triggers. We set the "expired" * flag, but we do not re-arm the timer (in case it's necessary, * tintv.tv64 != 0) until the timer is accessed. */ -static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) +static void timerfd_triggered(struct timerfd_ctx *ctx) { - struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr); unsigned long flags; spin_lock_irqsave(&ctx->wqh.lock, flags); @@ -56,10 +65,25 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) ctx->ticks++; wake_up_locked(&ctx->wqh); spin_unlock_irqrestore(&ctx->wqh.lock, flags); +} +static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) +{ + struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, + t.tmr); + timerfd_triggered(ctx); return HRTIMER_NORESTART; } +static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm, + ktime_t now) +{ + struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx, + t.alarm); + timerfd_triggered(ctx); + return ALARMTIMER_NORESTART; +} + /* * Called when the clock was set to cancel the timers in the cancel * list. This will wake up processes waiting on these timers. The @@ -107,8 +131,9 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx) static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { - if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) && - (flags & TFD_TIMER_CANCEL_ON_SET)) { + if ((ctx->clockid == CLOCK_REALTIME || + ctx->clockid == CLOCK_REALTIME_ALARM) && + (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { if (!ctx->might_cancel) { ctx->might_cancel = true; spin_lock(&cancel_lock); @@ -124,7 +149,11 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) { ktime_t remaining; - remaining = hrtimer_expires_remaining(&ctx->tmr); + if (isalarm(ctx)) + remaining = alarm_expires_remaining(&ctx->t.alarm); + else + remaining = hrtimer_expires_remaining(&ctx->t.tmr); + return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; } @@ -142,11 +171,28 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, ctx->expired = 0; ctx->ticks = 0; ctx->tintv = timespec_to_ktime(ktmr->it_interval); - hrtimer_init(&ctx->tmr, clockid, htmode); - hrtimer_set_expires(&ctx->tmr, texp); - ctx->tmr.function = timerfd_tmrproc; + + if (isalarm(ctx)) { + alarm_init(&ctx->t.alarm, + ctx->clockid == CLOCK_REALTIME_ALARM ? + ALARM_REALTIME : ALARM_BOOTTIME, + timerfd_alarmproc); + } else { + hrtimer_init(&ctx->t.tmr, clockid, htmode); + hrtimer_set_expires(&ctx->t.tmr, texp); + ctx->t.tmr.function = timerfd_tmrproc; + } + if (texp.tv64 != 0) { - hrtimer_start(&ctx->tmr, texp, htmode); + if (isalarm(ctx)) { + if (flags & TFD_TIMER_ABSTIME) + alarm_start(&ctx->t.alarm, texp); + else + alarm_start_relative(&ctx->t.alarm, texp); + } else { + hrtimer_start(&ctx->t.tmr, texp, htmode); + } + if (timerfd_canceled(ctx)) return -ECANCELED; } @@ -158,7 +204,11 @@ static int timerfd_release(struct inode *inode, struct file *file) struct timerfd_ctx *ctx = file->private_data; timerfd_remove_cancel(ctx); - hrtimer_cancel(&ctx->tmr); + + if (isalarm(ctx)) + alarm_cancel(&ctx->t.alarm); + else + hrtimer_cancel(&ctx->t.tmr); kfree_rcu(ctx, rcu); return 0; } @@ -215,9 +265,15 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, * callback to avoid DoS attacks specifying a very * short timer period. */ - ticks += hrtimer_forward_now(&ctx->tmr, - ctx->tintv) - 1; - hrtimer_restart(&ctx->tmr); + if (isalarm(ctx)) { + ticks += alarm_forward_now( + &ctx->t.alarm, ctx->tintv) - 1; + alarm_restart(&ctx->t.alarm); + } else { + ticks += hrtimer_forward_now(&ctx->t.tmr, + ctx->tintv) - 1; + hrtimer_restart(&ctx->t.tmr); + } } ctx->expired = 0; ctx->ticks = 0; @@ -259,7 +315,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) if ((flags & ~TFD_CREATE_FLAGS) || (clockid != CLOCK_MONOTONIC && - clockid != CLOCK_REALTIME)) + clockid != CLOCK_REALTIME && + clockid != CLOCK_REALTIME_ALARM && + clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -268,7 +326,15 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) init_waitqueue_head(&ctx->wqh); ctx->clockid = clockid; - hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); + + if (isalarm(ctx)) + alarm_init(&ctx->t.alarm, + ctx->clockid == CLOCK_REALTIME_ALARM ? + ALARM_REALTIME : ALARM_BOOTTIME, + timerfd_alarmproc); + else + hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); + ctx->moffs = ktime_get_monotonic_offset(); ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, @@ -305,8 +371,14 @@ static int do_timerfd_settime(int ufd, int flags, */ for (;;) { spin_lock_irq(&ctx->wqh.lock); - if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) - break; + + if (isalarm(ctx)) { + if (alarm_try_to_cancel(&ctx->t.alarm) >= 0) + break; + } else { + if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0) + break; + } spin_unlock_irq(&ctx->wqh.lock); cpu_relax(); } @@ -317,8 +389,12 @@ static int do_timerfd_settime(int ufd, int flags, * We do not update "ticks" and "expired" since the timer will be * re-programmed again in the following timerfd_setup() call. */ - if (ctx->expired && ctx->tintv.tv64) - hrtimer_forward_now(&ctx->tmr, ctx->tintv); + if (ctx->expired && ctx->tintv.tv64) { + if (isalarm(ctx)) + alarm_forward_now(&ctx->t.alarm, ctx->tintv); + else + hrtimer_forward_now(&ctx->t.tmr, ctx->tintv); + } old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); old->it_interval = ktime_to_timespec(ctx->tintv); @@ -345,9 +421,18 @@ static int do_timerfd_gettime(int ufd, struct itimerspec *t) spin_lock_irq(&ctx->wqh.lock); if (ctx->expired && ctx->tintv.tv64) { ctx->expired = 0; - ctx->ticks += - hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1; - hrtimer_restart(&ctx->tmr); + + if (isalarm(ctx)) { + ctx->ticks += + alarm_forward_now( + &ctx->t.alarm, ctx->tintv) - 1; + alarm_restart(&ctx->t.alarm); + } else { + ctx->ticks += + hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) + - 1; + hrtimer_restart(&ctx->t.tmr); + } } t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); t->it_interval = ktime_to_timespec(ctx->tintv); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f21acf0ef01f..879b9976c12b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1412,7 +1412,7 @@ static int mount_ubifs(struct ubifs_info *c) ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", c->vi.ubi_num, c->vi.vol_id, c->vi.name, - c->ro_mount ? ", R/O mode" : NULL); + c->ro_mount ? ", R/O mode" : ""); x = (long long)c->main_lebs * c->leb_size; y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c74d88baea60..69732d279e8b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -174,8 +174,6 @@ *(.data) \ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ - CPU_KEEP(init.data) \ - CPU_KEEP(exit.data) \ MEM_KEEP(init.data) \ MEM_KEEP(exit.data) \ *(.data.unlikely) \ @@ -355,8 +353,6 @@ /* __*init sections */ \ __init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \ *(.ref.rodata) \ - CPU_KEEP(init.rodata) \ - CPU_KEEP(exit.rodata) \ MEM_KEEP(init.rodata) \ MEM_KEEP(exit.rodata) \ } \ @@ -397,8 +393,6 @@ *(.text.hot) \ *(.text) \ *(.ref.text) \ - CPU_KEEP(init.text) \ - CPU_KEEP(exit.text) \ MEM_KEEP(init.text) \ MEM_KEEP(exit.text) \ *(.text.unlikely) @@ -482,14 +476,12 @@ /* init and exit section handling */ #define INIT_DATA \ *(.init.data) \ - CPU_DISCARD(init.data) \ MEM_DISCARD(init.data) \ KERNEL_CTORS() \ MCOUNT_REC() \ *(.init.rodata) \ FTRACE_EVENTS() \ TRACE_SYSCALLS() \ - CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.rodata) \ CLK_OF_TABLES() \ CLKSRC_OF_TABLES() \ @@ -498,19 +490,15 @@ #define INIT_TEXT \ *(.init.text) \ - CPU_DISCARD(init.text) \ MEM_DISCARD(init.text) #define EXIT_DATA \ *(.exit.data) \ - CPU_DISCARD(exit.data) \ - CPU_DISCARD(exit.rodata) \ MEM_DISCARD(exit.data) \ MEM_DISCARD(exit.rodata) #define EXIT_TEXT \ *(.exit.text) \ - CPU_DISCARD(exit.text) \ MEM_DISCARD(exit.text) #define EXIT_CALL \ diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 9069694e70eb..a899402a5a0e 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -44,10 +44,14 @@ struct alarm { void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); int alarm_start(struct alarm *alarm, ktime_t start); +int alarm_start_relative(struct alarm *alarm, ktime_t start); +void alarm_restart(struct alarm *alarm); int alarm_try_to_cancel(struct alarm *alarm); int alarm_cancel(struct alarm *alarm); u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval); +u64 alarm_forward_now(struct alarm *alarm, ktime_t interval); +ktime_t alarm_expires_remaining(const struct alarm *alarm); /* Provide way to access the rtc device being used by alarmtimers */ struct rtc_device *alarmtimer_get_rtcdev(void); diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index 2a5f64a11b77..10fe2a211c2e 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -76,11 +76,11 @@ struct pl08x_channel_data { * platform, all inclusive, including multiplexed channels. The available * physical channels will be multiplexed around these signals as they are * requested, just enumerate all possible channels. - * @get_signal: request a physical signal to be used for a DMA transfer + * @get_xfer_signal: request a physical signal to be used for a DMA transfer * immediately: if there is some multiplexing or similar blocking the use * of the channel the transfer can be denied by returning less than zero, * else it returns the allocated signal number - * @put_signal: indicate to the platform that this physical signal is not + * @put_xfer_signal: indicate to the platform that this physical signal is not * running any DMA transfer and multiplexing can be recycled * @lli_buses: buses which LLIs can be fetched from: PL08X_AHB1 | PL08X_AHB2 * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2 @@ -89,8 +89,8 @@ struct pl08x_platform_data { const struct pl08x_channel_data *slave_channels; unsigned int num_slave_channels; struct pl08x_channel_data memcpy_channel; - int (*get_signal)(const struct pl08x_channel_data *); - void (*put_signal)(const struct pl08x_channel_data *, int); + int (*get_xfer_signal)(const struct pl08x_channel_data *); + void (*put_xfer_signal)(const struct pl08x_channel_data *, int); u8 lli_buses; u8 mem_buses; }; diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 963d71431388..0857922e8ad0 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -30,6 +30,7 @@ enum clock_event_nofitiers { #include <linux/notifier.h> struct clock_event_device; +struct module; /* Clock event mode commands */ enum clock_event_mode { @@ -83,6 +84,7 @@ enum clock_event_mode { * @irq: IRQ number (only for non CPU local devices) * @cpumask: cpumask to indicate for which CPUs this device works * @list: list head for the management code + * @owner: module reference */ struct clock_event_device { void (*event_handler)(struct clock_event_device *); @@ -112,6 +114,7 @@ struct clock_event_device { int irq; const struct cpumask *cpumask; struct list_head list; + struct module *owner; } ____cacheline_aligned; /* @@ -138,6 +141,7 @@ static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec, extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); +extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu); extern void clockevents_config(struct clock_event_device *dev, u32 freq); extern void clockevents_config_and_register(struct clock_event_device *dev, @@ -150,7 +154,6 @@ extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); extern void clockevents_set_mode(struct clock_event_device *dev, enum clock_event_mode mode); -extern int clockevents_register_notifier(struct notifier_block *nb); extern int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, bool force); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 7279b94c01da..dbbf8aa7731b 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -21,6 +21,7 @@ /* clocksource cycle base type */ typedef u64 cycle_t; struct clocksource; +struct module; #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA #include <asm/clocksource.h> @@ -162,6 +163,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @suspend: suspend function for the clocksource, if necessary * @resume: resume function for the clocksource, if necessary * @cycle_last: most recent cycle counter value seen by ::read() + * @owner: module reference, must be set by clocksource in modules */ struct clocksource { /* @@ -195,6 +197,7 @@ struct clocksource { cycle_t cs_last; cycle_t wd_last; #endif + struct module *owner; } ____cacheline_aligned; /* @@ -207,6 +210,7 @@ struct clocksource { #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 #define CLOCK_SOURCE_UNSTABLE 0x40 #define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80 +#define CLOCK_SOURCE_RESELECT 0x100 /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) @@ -279,7 +283,7 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) extern int clocksource_register(struct clocksource*); -extern void clocksource_unregister(struct clocksource*); +extern int clocksource_unregister(struct clocksource*); extern void clocksource_touch_watchdog(void); extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); @@ -321,7 +325,7 @@ static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz) } -extern void timekeeping_notify(struct clocksource *clock); +extern int timekeeping_notify(struct clocksource *clock); extern cycle_t clocksource_mmio_readl_up(struct clocksource *); extern cycle_t clocksource_mmio_readl_down(struct clocksource *); diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index a9c96d865ee7..b3cb71f0d3b0 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -3,6 +3,10 @@ #include <linux/types.h> +#define CRC_T10DIF_DIGEST_SIZE 2 +#define CRC_T10DIF_BLOCK_SIZE 1 + +__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len); __u16 crc_t10dif(unsigned char const *, size_t); #endif diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h index 07261d52a6df..1f79b20918b1 100644 --- a/include/linux/dw_apb_timer.h +++ b/include/linux/dw_apb_timer.h @@ -51,6 +51,5 @@ dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base, void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs); void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs); cycle_t dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs); -void dw_apb_clocksource_unregister(struct dw_apb_clocksource *dw_cs); #endif /* __DW_APB_TIMER_H__ */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 21ae6b3c0359..5f8f176154f7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -594,8 +594,8 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); -extern unsigned long efi_get_time(void); -extern int efi_set_rtc_mmss(unsigned long nowtime); +extern void efi_get_time(struct timespec *now); +extern int efi_set_rtc_mmss(const struct timespec *now); extern void efi_reserve_boot_services(void); extern struct efi_memory_map memmap; diff --git a/include/linux/init.h b/include/linux/init.h index 861814710d52..e73f2b708525 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -93,13 +93,13 @@ #define __exit __section(.exit.text) __exitused __cold notrace -/* Used for HOTPLUG_CPU */ -#define __cpuinit __section(.cpuinit.text) __cold notrace -#define __cpuinitdata __section(.cpuinit.data) -#define __cpuinitconst __constsection(.cpuinit.rodata) -#define __cpuexit __section(.cpuexit.text) __exitused __cold notrace -#define __cpuexitdata __section(.cpuexit.data) -#define __cpuexitconst __constsection(.cpuexit.rodata) +/* temporary, until all users are removed */ +#define __cpuinit +#define __cpuinitdata +#define __cpuinitconst +#define __cpuexit +#define __cpuexitdata +#define __cpuexitconst /* Used for MEMORY_HOTPLUG */ #define __meminit __section(.meminit.text) __cold notrace @@ -118,9 +118,8 @@ #define __INITRODATA .section ".init.rodata","a",%progbits #define __FINITDATA .previous -#define __CPUINIT .section ".cpuinit.text", "ax" -#define __CPUINITDATA .section ".cpuinit.data", "aw" -#define __CPUINITRODATA .section ".cpuinit.rodata", "a" +/* temporary, until all users are removed */ +#define __CPUINIT #define __MEMINIT .section ".meminit.text", "ax" #define __MEMINITDATA .section ".meminit.data", "aw" diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index ba2c708adcff..c983ed18c332 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -73,57 +73,48 @@ struct irq_domain_chip_generic; /** * struct irq_domain - Hardware interrupt number translation object * @link: Element in global irq_domain list. - * @revmap_type: Method used for reverse mapping hwirq numbers to linux irq. This - * will be one of the IRQ_DOMAIN_MAP_* values. - * @revmap_data: Revmap method specific data. + * @name: Name of interrupt domain * @ops: pointer to irq_domain methods * @host_data: private data pointer for use by owner. Not touched by irq_domain * core code. - * @irq_base: Start of irq_desc range assigned to the irq_domain. The creator - * of the irq_domain is responsible for allocating the array of - * irq_desc structures. - * @nr_irq: Number of irqs managed by the irq domain - * @hwirq_base: Starting number for hwirqs managed by the irq domain - * @of_node: (optional) Pointer to device tree nodes associated with the - * irq_domain. Used when decoding device tree interrupt specifiers. + * + * Optional elements + * @of_node: Pointer to device tree nodes associated with the irq_domain. Used + * when decoding device tree interrupt specifiers. + * @gc: Pointer to a list of generic chips. There is a helper function for + * setting up one or more generic chips for interrupt controllers + * drivers using the generic chip library which uses this pointer. + * + * Revmap data, used internally by irq_domain + * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that + * support direct mapping + * @revmap_size: Size of the linear map table @linear_revmap[] + * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map + * @linear_revmap: Linear table of hwirq->virq reverse mappings */ struct irq_domain { struct list_head link; - - /* type of reverse mapping_technique */ - unsigned int revmap_type; - union { - struct { - unsigned int size; - unsigned int first_irq; - irq_hw_number_t first_hwirq; - } legacy; - struct { - unsigned int size; - unsigned int *revmap; - } linear; - struct { - unsigned int max_irq; - } nomap; - struct radix_tree_root tree; - } revmap_data; + const char *name; const struct irq_domain_ops *ops; void *host_data; - irq_hw_number_t inval_irq; - /* Optional device node pointer */ + /* Optional data */ struct device_node *of_node; - /* Optional pointer to generic interrupt chips */ struct irq_domain_chip_generic *gc; -}; -#define IRQ_DOMAIN_MAP_LEGACY 0 /* driver allocated fixed range of irqs. - * ie. legacy 8259, gets irqs 1..15 */ -#define IRQ_DOMAIN_MAP_NOMAP 1 /* no fast reverse mapping */ -#define IRQ_DOMAIN_MAP_LINEAR 2 /* linear map of interrupts */ -#define IRQ_DOMAIN_MAP_TREE 3 /* radix tree */ + /* reverse map data. The linear map gets appended to the irq_domain */ + irq_hw_number_t hwirq_max; + unsigned int revmap_direct_max_irq; + unsigned int revmap_size; + struct radix_tree_root revmap_tree; + unsigned int linear_revmap[]; +}; #ifdef CONFIG_IRQ_DOMAIN +struct irq_domain *__irq_domain_add(struct device_node *of_node, int size, + irq_hw_number_t hwirq_max, int direct_max, + const struct irq_domain_ops *ops, + void *host_data); struct irq_domain *irq_domain_add_simple(struct device_node *of_node, unsigned int size, unsigned int first_irq, @@ -135,21 +126,30 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data); -struct irq_domain *irq_domain_add_linear(struct device_node *of_node, +extern struct irq_domain *irq_find_host(struct device_node *node); +extern void irq_set_default_host(struct irq_domain *host); + +/** + * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. + * @of_node: pointer to interrupt controller's device tree node. + * @size: Number of interrupts in the domain. + * @ops: map/unmap domain callbacks + * @host_data: Controller private data pointer + */ +static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node, unsigned int size, const struct irq_domain_ops *ops, - void *host_data); -struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, + void *host_data) +{ + return __irq_domain_add(of_node, size, size, 0, ops, host_data); +} +static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, unsigned int max_irq, const struct irq_domain_ops *ops, - void *host_data); -struct irq_domain *irq_domain_add_tree(struct device_node *of_node, - const struct irq_domain_ops *ops, - void *host_data); - -extern struct irq_domain *irq_find_host(struct device_node *node); -extern void irq_set_default_host(struct irq_domain *host); - + void *host_data) +{ + return __irq_domain_add(of_node, 0, max_irq, max_irq, ops, host_data); +} static inline struct irq_domain *irq_domain_add_legacy_isa( struct device_node *of_node, const struct irq_domain_ops *ops, @@ -158,21 +158,40 @@ static inline struct irq_domain *irq_domain_add_legacy_isa( return irq_domain_add_legacy(of_node, NUM_ISA_INTERRUPTS, 0, 0, ops, host_data); } +static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, + const struct irq_domain_ops *ops, + void *host_data) +{ + return __irq_domain_add(of_node, 0, ~0, 0, ops, host_data); +} extern void irq_domain_remove(struct irq_domain *host); -extern int irq_domain_associate_many(struct irq_domain *domain, - unsigned int irq_base, - irq_hw_number_t hwirq_base, int count); -static inline int irq_domain_associate(struct irq_domain *domain, unsigned int irq, - irq_hw_number_t hwirq) -{ - return irq_domain_associate_many(domain, irq, hwirq, 1); -} +extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq, + irq_hw_number_t hwirq); +extern void irq_domain_associate_many(struct irq_domain *domain, + unsigned int irq_base, + irq_hw_number_t hwirq_base, int count); extern unsigned int irq_create_mapping(struct irq_domain *host, irq_hw_number_t hwirq); extern void irq_dispose_mapping(unsigned int virq); + +/** + * irq_linear_revmap() - Find a linux irq from a hw irq number. + * @domain: domain owning this hardware interrupt + * @hwirq: hardware irq number in that domain space + * + * This is a fast path alternative to irq_find_mapping() that can be + * called directly by irq controller code to save a handful of + * instructions. It is always safe to call, but won't find irqs mapped + * using the radix tree. + */ +static inline unsigned int irq_linear_revmap(struct irq_domain *domain, + irq_hw_number_t hwirq) +{ + return hwirq < domain->revmap_size ? domain->linear_revmap[hwirq] : 0; +} extern unsigned int irq_find_mapping(struct irq_domain *host, irq_hw_number_t hwirq); extern unsigned int irq_create_direct_mapping(struct irq_domain *host); @@ -186,9 +205,6 @@ static inline int irq_create_identity_mapping(struct irq_domain *host, return irq_create_strict_mappings(host, hwirq, hwirq, 1); } -extern unsigned int irq_linear_revmap(struct irq_domain *host, - irq_hw_number_t hwirq); - extern const struct irq_domain_ops irq_domain_simple_ops; /* stock xlate functions */ @@ -202,14 +218,6 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type); -#if defined(CONFIG_OF_IRQ) -extern void irq_domain_generate_simple(const struct of_device_id *match, - u64 phys_base, unsigned int irq_start); -#else /* CONFIG_OF_IRQ */ -static inline void irq_domain_generate_simple(const struct of_device_id *match, - u64 phys_base, unsigned int irq_start) { } -#endif /* !CONFIG_OF_IRQ */ - #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } #endif /* !CONFIG_IRQ_DOMAIN */ diff --git a/include/linux/ktime.h b/include/linux/ktime.h index bbca12804d12..fc66b301b648 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -229,7 +229,8 @@ static inline ktime_t timespec_to_ktime(const struct timespec ts) static inline ktime_t timeval_to_ktime(const struct timeval tv) { return (ktime_t) { .tv = { .sec = (s32)tv.tv_sec, - .nsec = (s32)tv.tv_usec * 1000 } }; + .nsec = (s32)(tv.tv_usec * + NSEC_PER_USEC) } }; } /** @@ -320,12 +321,12 @@ static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier) static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec) { - return ktime_add_ns(kt, usec * 1000); + return ktime_add_ns(kt, usec * NSEC_PER_USEC); } static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec) { - return ktime_sub_ns(kt, usec * 1000); + return ktime_sub_ns(kt, usec * NSEC_PER_USEC); } extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); @@ -338,7 +339,8 @@ extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); * * Returns true if there was a successful conversion, false if kt was 0. */ -static inline bool ktime_to_timespec_cond(const ktime_t kt, struct timespec *ts) +static inline __must_check bool ktime_to_timespec_cond(const ktime_t kt, + struct timespec *ts) { if (kt.tv64) { *ts = ktime_to_timespec(kt); diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h index 364dda734877..ae36298ba076 100644 --- a/include/linux/of_dma.h +++ b/include/linux/of_dma.h @@ -21,7 +21,6 @@ struct device_node; struct of_dma { struct list_head of_dma_controllers; struct device_node *of_node; - int of_dma_nbcells; struct dma_chan *(*of_dma_xlate) (struct of_phandle_args *, struct of_dma *); void *of_dma_data; diff --git a/include/linux/platform_data/dma-atmel.h b/include/linux/platform_data/dma-atmel.h index cab0997be3de..e95f19c65873 100644 --- a/include/linux/platform_data/dma-atmel.h +++ b/include/linux/platform_data/dma-atmel.h @@ -35,16 +35,20 @@ struct at_dma_slave { /* Platform-configurable bits in CFG */ +#define ATC_PER_MSB(h) ((0x30U & (h)) >> 4) /* Extract most significant bits of a handshaking identifier */ + #define ATC_SRC_PER(h) (0xFU & (h)) /* Channel src rq associated with periph handshaking ifc h */ #define ATC_DST_PER(h) ((0xFU & (h)) << 4) /* Channel dst rq associated with periph handshaking ifc h */ #define ATC_SRC_REP (0x1 << 8) /* Source Replay Mod */ #define ATC_SRC_H2SEL (0x1 << 9) /* Source Handshaking Mod */ #define ATC_SRC_H2SEL_SW (0x0 << 9) #define ATC_SRC_H2SEL_HW (0x1 << 9) +#define ATC_SRC_PER_MSB(h) (ATC_PER_MSB(h) << 10) /* Channel src rq (most significant bits) */ #define ATC_DST_REP (0x1 << 12) /* Destination Replay Mod */ #define ATC_DST_H2SEL (0x1 << 13) /* Destination Handshaking Mod */ #define ATC_DST_H2SEL_SW (0x0 << 13) #define ATC_DST_H2SEL_HW (0x1 << 13) +#define ATC_DST_PER_MSB(h) (ATC_PER_MSB(h) << 14) /* Channel dst rq (most significant bits) */ #define ATC_SOD (0x1 << 16) /* Stop On Done */ #define ATC_LOCK_IF (0x1 << 20) /* Interface Lock */ #define ATC_LOCK_B (0x1 << 21) /* AHB Bus Lock */ diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h index f6d30cc1cb77..beac6b8b6a7b 100644 --- a/include/linux/platform_data/dma-imx.h +++ b/include/linux/platform_data/dma-imx.h @@ -60,10 +60,8 @@ static inline int imx_dma_is_ipu(struct dma_chan *chan) static inline int imx_dma_is_general_purpose(struct dma_chan *chan) { - return strstr(dev_name(chan->device->dev), "sdma") || - !strcmp(dev_name(chan->device->dev), "imx1-dma") || - !strcmp(dev_name(chan->device->dev), "imx21-dma") || - !strcmp(dev_name(chan->device->dev), "imx27-dma"); + return !strcmp(chan->device->dev->driver->name, "imx-sdma") || + !strcmp(chan->device->dev->driver->name, "imx-dma"); } #endif diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 7794d75ed155..907f3fd191ac 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -7,14 +7,20 @@ #include <linux/timex.h> #include <linux/alarmtimer.h> -union cpu_time_count { - cputime_t cpu; - unsigned long long sched; -}; + +static inline unsigned long long cputime_to_expires(cputime_t expires) +{ + return (__force unsigned long long)expires; +} + +static inline cputime_t expires_to_cputime(unsigned long long expires) +{ + return (__force cputime_t)expires; +} struct cpu_timer_list { struct list_head entry; - union cpu_time_count expires, incr; + unsigned long long expires, incr; struct task_struct *task; int firing; }; diff --git a/include/linux/pvclock_gtod.h b/include/linux/pvclock_gtod.h index 0ca75825b60d..a71d2dbd3610 100644 --- a/include/linux/pvclock_gtod.h +++ b/include/linux/pvclock_gtod.h @@ -3,6 +3,13 @@ #include <linux/notifier.h> +/* + * The pvclock gtod notifier is called when the system time is updated + * and is used to keep guest time synchronized with host time. + * + * The 'action' parameter in the notifier function is false (0), or + * true (non-zero) if system time was stepped. + */ extern int pvclock_gtod_register_notifier(struct notifier_block *nb); extern int pvclock_gtod_unregister_notifier(struct notifier_block *nb); diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h new file mode 100644 index 000000000000..fa7922c80a41 --- /dev/null +++ b/include/linux/sched_clock.h @@ -0,0 +1,21 @@ +/* + * sched_clock.h: support for extending counters to full 64-bit ns counter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef LINUX_SCHED_CLOCK +#define LINUX_SCHED_CLOCK + +#ifdef CONFIG_GENERIC_SCHED_CLOCK +extern void sched_clock_postinit(void); +#else +static inline void sched_clock_postinit(void) { } +#endif + +extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate); + +extern unsigned long long (*sched_clock_func)(void); + +#endif diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h index b64d6bec6f90..4e83f3e034f3 100644 --- a/include/linux/sh_dma.h +++ b/include/linux/sh_dma.h @@ -99,6 +99,4 @@ struct sh_dmae_pdata { #define CHCR_TE 0x00000002 #define CHCR_IE 0x00000004 -bool shdma_chan_filter(struct dma_chan *chan, void *arg); - #endif diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h index a3728bf66f0e..382cf710ca9a 100644 --- a/include/linux/shdma-base.h +++ b/include/linux/shdma-base.h @@ -68,6 +68,8 @@ struct shdma_chan { int id; /* Raw id of this channel */ int irq; /* Channel IRQ */ int slave_id; /* Client ID for slave DMA */ + int hw_req; /* DMA request line for slave DMA - same + * as MID/RID, used with DT */ enum shdma_pm_state pm_state; }; @@ -122,5 +124,6 @@ void shdma_chan_remove(struct shdma_chan *schan); int shdma_init(struct device *dev, struct shdma_dev *sdev, int chan_num); void shdma_cleanup(struct shdma_dev *sdev); +bool shdma_chan_filter(struct dma_chan *chan, void *arg); #endif diff --git a/include/uapi/mtd/ubi-user.h b/include/uapi/mtd/ubi-user.h index 53cae1e11e57..723c324590c1 100644 --- a/include/uapi/mtd/ubi-user.h +++ b/include/uapi/mtd/ubi-user.h @@ -173,7 +173,10 @@ #define UBI_VOL_IOC_MAGIC 'O' -/* Start UBI volume update */ +/* Start UBI volume update + * Note: This actually takes a pointer (__s64*), but we can't change + * that without breaking the ABI on 32bit systems + */ #define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, __s64) /* LEB erasure command, used for debugging, disabled by default */ #define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, __s32) diff --git a/init/Kconfig b/init/Kconfig index ef10d83bc379..ea1be003275a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -780,6 +780,9 @@ config LOG_BUF_SHIFT config HAVE_UNSTABLE_SCHED_CLOCK bool +config GENERIC_SCHED_CLOCK + bool + # # For architectures that want to enable the support for NUMA-affine scheduler # balancing logic: diff --git a/init/main.c b/init/main.c index f2366533c922..d03d2ec2eacf 100644 --- a/init/main.c +++ b/init/main.c @@ -74,6 +74,7 @@ #include <linux/ptrace.h> #include <linux/blkdev.h> #include <linux/elevator.h> +#include <linux/sched_clock.h> #include <asm/io.h> #include <asm/bugs.h> @@ -554,6 +555,7 @@ asmlinkage void __init start_kernel(void) softirq_init(); timekeeping_init(); time_init(); + sched_clock_postinit(); perf_event_init(); profile_init(); call_function_init(); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 3ee4d06c6fc2..f0f4fe29cd21 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -722,17 +722,20 @@ static int hrtimer_switch_to_hres(void) return 1; } +static void clock_was_set_work(struct work_struct *work) +{ + clock_was_set(); +} + +static DECLARE_WORK(hrtimer_work, clock_was_set_work); + /* - * Called from timekeeping code to reprogramm the hrtimer interrupt - * device. If called from the timer interrupt context we defer it to - * softirq context. + * Called from timekeeping and resume code to reprogramm the hrtimer + * interrupt device on all cpus. */ void clock_was_set_delayed(void) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - cpu_base->clock_was_set = 1; - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + schedule_work(&hrtimer_work); } #else @@ -774,15 +777,19 @@ void clock_was_set(void) /* * During resume we might have to reprogram the high resolution timer - * interrupt (on the local CPU): + * interrupt on all online CPUs. However, all other CPUs will be + * stopped with IRQs interrupts disabled so the clock_was_set() call + * must be deferred. */ void hrtimers_resume(void) { WARN_ONCE(!irqs_disabled(), KERN_INFO "hrtimers_resume() called with IRQs enabled!"); + /* Retrigger on the local CPU */ retrigger_next_event(NULL); - timerfd_clock_was_set(); + /* And schedule a retrigger for all others */ + clock_was_set_delayed(); } static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) @@ -1433,13 +1440,6 @@ void hrtimer_peek_ahead_timers(void) static void run_hrtimer_softirq(struct softirq_action *h) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - if (cpu_base->clock_was_set) { - cpu_base->clock_was_set = 0; - clock_was_set(); - } - hrtimer_peek_ahead_timers(); } diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index e3544c19bdd2..10e663ab1f4a 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -275,10 +275,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, if (d->gc) return -EBUSY; - if (d->revmap_type != IRQ_DOMAIN_MAP_LINEAR) - return -EINVAL; - - numchips = d->revmap_data.linear.size / irqs_per_chip; + numchips = d->revmap_size / irqs_per_chip; if (!numchips) return -EINVAL; @@ -310,6 +307,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, /* Calc pointer to the next generic chip */ tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); } + d->name = name; return 0; } EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 1ed8dff17eb9..2d7cd3428365 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -23,9 +23,11 @@ static DEFINE_MUTEX(revmap_trees_mutex); static struct irq_domain *irq_default_domain; /** - * irq_domain_alloc() - Allocate a new irq_domain data structure + * __irq_domain_add() - Allocate a new irq_domain data structure * @of_node: optional device-tree node of the interrupt controller - * @revmap_type: type of reverse mapping to use + * @size: Size of linear map; 0 for radix mapping only + * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no + * direct mapping * @ops: map/unmap domain callbacks * @host_data: Controller private data pointer * @@ -33,41 +35,35 @@ static struct irq_domain *irq_default_domain; * register allocated irq_domain with irq_domain_register(). Returns pointer * to IRQ domain, or NULL on failure. */ -static struct irq_domain *irq_domain_alloc(struct device_node *of_node, - unsigned int revmap_type, - const struct irq_domain_ops *ops, - void *host_data) +struct irq_domain *__irq_domain_add(struct device_node *of_node, int size, + irq_hw_number_t hwirq_max, int direct_max, + const struct irq_domain_ops *ops, + void *host_data) { struct irq_domain *domain; - domain = kzalloc_node(sizeof(*domain), GFP_KERNEL, - of_node_to_nid(of_node)); + domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size), + GFP_KERNEL, of_node_to_nid(of_node)); if (WARN_ON(!domain)) return NULL; /* Fill structure */ - domain->revmap_type = revmap_type; + INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); domain->ops = ops; domain->host_data = host_data; domain->of_node = of_node_get(of_node); + domain->hwirq_max = hwirq_max; + domain->revmap_size = size; + domain->revmap_direct_max_irq = direct_max; - return domain; -} - -static void irq_domain_free(struct irq_domain *domain) -{ - of_node_put(domain->of_node); - kfree(domain); -} - -static void irq_domain_add(struct irq_domain *domain) -{ mutex_lock(&irq_domain_mutex); list_add(&domain->link, &irq_domain_list); mutex_unlock(&irq_domain_mutex); - pr_debug("Allocated domain of type %d @0x%p\n", - domain->revmap_type, domain); + + pr_debug("Added domain %s\n", domain->name); + return domain; } +EXPORT_SYMBOL_GPL(__irq_domain_add); /** * irq_domain_remove() - Remove an irq domain. @@ -81,29 +77,12 @@ void irq_domain_remove(struct irq_domain *domain) { mutex_lock(&irq_domain_mutex); - switch (domain->revmap_type) { - case IRQ_DOMAIN_MAP_LEGACY: - /* - * Legacy domains don't manage their own irq_desc - * allocations, we expect the caller to handle irq_desc - * freeing on their own. - */ - break; - case IRQ_DOMAIN_MAP_TREE: - /* - * radix_tree_delete() takes care of destroying the root - * node when all entries are removed. Shout if there are - * any mappings left. - */ - WARN_ON(domain->revmap_data.tree.height); - break; - case IRQ_DOMAIN_MAP_LINEAR: - kfree(domain->revmap_data.linear.revmap); - domain->revmap_data.linear.size = 0; - break; - case IRQ_DOMAIN_MAP_NOMAP: - break; - } + /* + * radix_tree_delete() takes care of destroying the root + * node when all entries are removed. Shout if there are + * any mappings left. + */ + WARN_ON(domain->revmap_tree.height); list_del(&domain->link); @@ -115,44 +94,30 @@ void irq_domain_remove(struct irq_domain *domain) mutex_unlock(&irq_domain_mutex); - pr_debug("Removed domain of type %d @0x%p\n", - domain->revmap_type, domain); + pr_debug("Removed domain %s\n", domain->name); - irq_domain_free(domain); + of_node_put(domain->of_node); + kfree(domain); } EXPORT_SYMBOL_GPL(irq_domain_remove); -static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain, - irq_hw_number_t hwirq) -{ - irq_hw_number_t first_hwirq = domain->revmap_data.legacy.first_hwirq; - int size = domain->revmap_data.legacy.size; - - if (WARN_ON(hwirq < first_hwirq || hwirq >= first_hwirq + size)) - return 0; - return hwirq - first_hwirq + domain->revmap_data.legacy.first_irq; -} - /** - * irq_domain_add_simple() - Allocate and register a simple irq_domain. + * irq_domain_add_simple() - Register an irq_domain and optionally map a range of irqs * @of_node: pointer to interrupt controller's device tree node. * @size: total number of irqs in mapping * @first_irq: first number of irq block assigned to the domain, - * pass zero to assign irqs on-the-fly. This will result in a - * linear IRQ domain so it is important to use irq_create_mapping() - * for each used IRQ, especially when SPARSE_IRQ is enabled. + * pass zero to assign irqs on-the-fly. If first_irq is non-zero, then + * pre-map all of the irqs in the domain to virqs starting at first_irq. * @ops: map/unmap domain callbacks * @host_data: Controller private data pointer * - * Allocates a legacy irq_domain if irq_base is positive or a linear - * domain otherwise. For the legacy domain, IRQ descriptors will also - * be allocated. + * Allocates an irq_domain, and optionally if first_irq is positive then also + * allocate irq_descs and map all of the hwirqs to virqs starting at first_irq. * * This is intended to implement the expected behaviour for most - * interrupt controllers which is that a linear mapping should - * normally be used unless the system requires a legacy mapping in - * order to support supplying interrupt numbers during non-DT - * registration of devices. + * interrupt controllers. If device tree is used, then first_irq will be 0 and + * irqs get mapped dynamically on the fly. However, if the controller requires + * static virq assignments (non-DT boot) then it will set that up correctly. */ struct irq_domain *irq_domain_add_simple(struct device_node *of_node, unsigned int size, @@ -160,33 +125,25 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node, const struct irq_domain_ops *ops, void *host_data) { - if (first_irq > 0) { - int irq_base; + struct irq_domain *domain; + + domain = __irq_domain_add(of_node, size, size, 0, ops, host_data); + if (!domain) + return NULL; + if (first_irq > 0) { if (IS_ENABLED(CONFIG_SPARSE_IRQ)) { - /* - * Set the descriptor allocator to search for a - * 1-to-1 mapping, such as irq_alloc_desc_at(). - * Use of_node_to_nid() which is defined to - * numa_node_id() on platforms that have no custom - * implementation. - */ - irq_base = irq_alloc_descs(first_irq, first_irq, size, - of_node_to_nid(of_node)); - if (irq_base < 0) { + /* attempt to allocated irq_descs */ + int rc = irq_alloc_descs(first_irq, first_irq, size, + of_node_to_nid(of_node)); + if (rc < 0) pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", first_irq); - irq_base = first_irq; - } - } else - irq_base = first_irq; - - return irq_domain_add_legacy(of_node, size, irq_base, 0, - ops, host_data); + } + irq_domain_associate_many(domain, first_irq, 0, size); } - /* A linear domain is the default */ - return irq_domain_add_linear(of_node, size, ops, host_data); + return domain; } EXPORT_SYMBOL_GPL(irq_domain_add_simple); @@ -213,131 +170,19 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, void *host_data) { struct irq_domain *domain; - unsigned int i; - domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LEGACY, ops, host_data); + domain = __irq_domain_add(of_node, first_hwirq + size, + first_hwirq + size, 0, ops, host_data); if (!domain) return NULL; - domain->revmap_data.legacy.first_irq = first_irq; - domain->revmap_data.legacy.first_hwirq = first_hwirq; - domain->revmap_data.legacy.size = size; + irq_domain_associate_many(domain, first_irq, first_hwirq, size); - mutex_lock(&irq_domain_mutex); - /* Verify that all the irqs are available */ - for (i = 0; i < size; i++) { - int irq = first_irq + i; - struct irq_data *irq_data = irq_get_irq_data(irq); - - if (WARN_ON(!irq_data || irq_data->domain)) { - mutex_unlock(&irq_domain_mutex); - irq_domain_free(domain); - return NULL; - } - } - - /* Claim all of the irqs before registering a legacy domain */ - for (i = 0; i < size; i++) { - struct irq_data *irq_data = irq_get_irq_data(first_irq + i); - irq_data->hwirq = first_hwirq + i; - irq_data->domain = domain; - } - mutex_unlock(&irq_domain_mutex); - - for (i = 0; i < size; i++) { - int irq = first_irq + i; - int hwirq = first_hwirq + i; - - /* IRQ0 gets ignored */ - if (!irq) - continue; - - /* Legacy flags are left to default at this point, - * one can then use irq_create_mapping() to - * explicitly change them - */ - if (ops->map) - ops->map(domain, irq, hwirq); - - /* Clear norequest flags */ - irq_clear_status_flags(irq, IRQ_NOREQUEST); - } - - irq_domain_add(domain); return domain; } EXPORT_SYMBOL_GPL(irq_domain_add_legacy); /** - * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. - * @of_node: pointer to interrupt controller's device tree node. - * @size: Number of interrupts in the domain. - * @ops: map/unmap domain callbacks - * @host_data: Controller private data pointer - */ -struct irq_domain *irq_domain_add_linear(struct device_node *of_node, - unsigned int size, - const struct irq_domain_ops *ops, - void *host_data) -{ - struct irq_domain *domain; - unsigned int *revmap; - - revmap = kzalloc_node(sizeof(*revmap) * size, GFP_KERNEL, - of_node_to_nid(of_node)); - if (WARN_ON(!revmap)) - return NULL; - - domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LINEAR, ops, host_data); - if (!domain) { - kfree(revmap); - return NULL; - } - domain->revmap_data.linear.size = size; - domain->revmap_data.linear.revmap = revmap; - irq_domain_add(domain); - return domain; -} -EXPORT_SYMBOL_GPL(irq_domain_add_linear); - -struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, - unsigned int max_irq, - const struct irq_domain_ops *ops, - void *host_data) -{ - struct irq_domain *domain = irq_domain_alloc(of_node, - IRQ_DOMAIN_MAP_NOMAP, ops, host_data); - if (domain) { - domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0; - irq_domain_add(domain); - } - return domain; -} -EXPORT_SYMBOL_GPL(irq_domain_add_nomap); - -/** - * irq_domain_add_tree() - * @of_node: pointer to interrupt controller's device tree node. - * @ops: map/unmap domain callbacks - * - * Note: The radix tree will be allocated later during boot automatically - * (the reverse mapping will use the slow path until that happens). - */ -struct irq_domain *irq_domain_add_tree(struct device_node *of_node, - const struct irq_domain_ops *ops, - void *host_data) -{ - struct irq_domain *domain = irq_domain_alloc(of_node, - IRQ_DOMAIN_MAP_TREE, ops, host_data); - if (domain) { - INIT_RADIX_TREE(&domain->revmap_data.tree, GFP_KERNEL); - irq_domain_add(domain); - } - return domain; -} -EXPORT_SYMBOL_GPL(irq_domain_add_tree); - -/** * irq_find_host() - Locates a domain for a given device node * @node: device-tree node of the interrupt controller */ @@ -385,125 +230,108 @@ void irq_set_default_host(struct irq_domain *domain) } EXPORT_SYMBOL_GPL(irq_set_default_host); -static void irq_domain_disassociate_many(struct irq_domain *domain, - unsigned int irq_base, int count) +static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) { - /* - * disassociate in reverse order; - * not strictly necessary, but nice for unwinding - */ - while (count--) { - int irq = irq_base + count; - struct irq_data *irq_data = irq_get_irq_data(irq); - irq_hw_number_t hwirq; + struct irq_data *irq_data = irq_get_irq_data(irq); + irq_hw_number_t hwirq; - if (WARN_ON(!irq_data || irq_data->domain != domain)) - continue; + if (WARN(!irq_data || irq_data->domain != domain, + "virq%i doesn't exist; cannot disassociate\n", irq)) + return; - hwirq = irq_data->hwirq; - irq_set_status_flags(irq, IRQ_NOREQUEST); + hwirq = irq_data->hwirq; + irq_set_status_flags(irq, IRQ_NOREQUEST); - /* remove chip and handler */ - irq_set_chip_and_handler(irq, NULL, NULL); + /* remove chip and handler */ + irq_set_chip_and_handler(irq, NULL, NULL); - /* Make sure it's completed */ - synchronize_irq(irq); + /* Make sure it's completed */ + synchronize_irq(irq); - /* Tell the PIC about it */ - if (domain->ops->unmap) - domain->ops->unmap(domain, irq); - smp_mb(); + /* Tell the PIC about it */ + if (domain->ops->unmap) + domain->ops->unmap(domain, irq); + smp_mb(); - irq_data->domain = NULL; - irq_data->hwirq = 0; + irq_data->domain = NULL; + irq_data->hwirq = 0; - /* Clear reverse map */ - switch(domain->revmap_type) { - case IRQ_DOMAIN_MAP_LINEAR: - if (hwirq < domain->revmap_data.linear.size) - domain->revmap_data.linear.revmap[hwirq] = 0; - break; - case IRQ_DOMAIN_MAP_TREE: - mutex_lock(&revmap_trees_mutex); - radix_tree_delete(&domain->revmap_data.tree, hwirq); - mutex_unlock(&revmap_trees_mutex); - break; - } + /* Clear reverse map for this hwirq */ + if (hwirq < domain->revmap_size) { + domain->linear_revmap[hwirq] = 0; + } else { + mutex_lock(&revmap_trees_mutex); + radix_tree_delete(&domain->revmap_tree, hwirq); + mutex_unlock(&revmap_trees_mutex); } } -int irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, - irq_hw_number_t hwirq_base, int count) +int irq_domain_associate(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) { - unsigned int virq = irq_base; - irq_hw_number_t hwirq = hwirq_base; - int i, ret; + struct irq_data *irq_data = irq_get_irq_data(virq); + int ret; - pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__, - of_node_full_name(domain->of_node), irq_base, (int)hwirq_base, count); + if (WARN(hwirq >= domain->hwirq_max, + "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name)) + return -EINVAL; + if (WARN(!irq_data, "error: virq%i is not allocated", virq)) + return -EINVAL; + if (WARN(irq_data->domain, "error: virq%i is already associated", virq)) + return -EINVAL; - for (i = 0; i < count; i++) { - struct irq_data *irq_data = irq_get_irq_data(virq + i); - - if (WARN(!irq_data, "error: irq_desc not allocated; " - "irq=%i hwirq=0x%x\n", virq + i, (int)hwirq + i)) - return -EINVAL; - if (WARN(irq_data->domain, "error: irq_desc already associated; " - "irq=%i hwirq=0x%x\n", virq + i, (int)hwirq + i)) - return -EINVAL; - }; - - for (i = 0; i < count; i++, virq++, hwirq++) { - struct irq_data *irq_data = irq_get_irq_data(virq); - - irq_data->hwirq = hwirq; - irq_data->domain = domain; - if (domain->ops->map) { - ret = domain->ops->map(domain, virq, hwirq); - if (ret != 0) { - /* - * If map() returns -EPERM, this interrupt is protected - * by the firmware or some other service and shall not - * be mapped. - * - * Since on some platforms we blindly try to map everything - * we end up with a log full of backtraces. - * - * So instead, we silently fail on -EPERM, it is the - * responsibility of the PIC driver to display a relevant - * message if needed. - */ - if (ret != -EPERM) { - pr_err("irq-%i==>hwirq-0x%lx mapping failed: %d\n", - virq, hwirq, ret); - WARN_ON(1); - } - irq_data->domain = NULL; - irq_data->hwirq = 0; - goto err_unmap; + mutex_lock(&irq_domain_mutex); + irq_data->hwirq = hwirq; + irq_data->domain = domain; + if (domain->ops->map) { + ret = domain->ops->map(domain, virq, hwirq); + if (ret != 0) { + /* + * If map() returns -EPERM, this interrupt is protected + * by the firmware or some other service and shall not + * be mapped. Don't bother telling the user about it. + */ + if (ret != -EPERM) { + pr_info("%s didn't like hwirq-0x%lx to VIRQ%i mapping (rc=%d)\n", + domain->name, hwirq, virq, ret); } + irq_data->domain = NULL; + irq_data->hwirq = 0; + mutex_unlock(&irq_domain_mutex); + return ret; } - switch (domain->revmap_type) { - case IRQ_DOMAIN_MAP_LINEAR: - if (hwirq < domain->revmap_data.linear.size) - domain->revmap_data.linear.revmap[hwirq] = virq; - break; - case IRQ_DOMAIN_MAP_TREE: - mutex_lock(&revmap_trees_mutex); - radix_tree_insert(&domain->revmap_data.tree, hwirq, irq_data); - mutex_unlock(&revmap_trees_mutex); - break; - } + /* If not already assigned, give the domain the chip's name */ + if (!domain->name && irq_data->chip) + domain->name = irq_data->chip->name; + } - irq_clear_status_flags(virq, IRQ_NOREQUEST); + if (hwirq < domain->revmap_size) { + domain->linear_revmap[hwirq] = virq; + } else { + mutex_lock(&revmap_trees_mutex); + radix_tree_insert(&domain->revmap_tree, hwirq, irq_data); + mutex_unlock(&revmap_trees_mutex); } + mutex_unlock(&irq_domain_mutex); + + irq_clear_status_flags(virq, IRQ_NOREQUEST); return 0; +} +EXPORT_SYMBOL_GPL(irq_domain_associate); - err_unmap: - irq_domain_disassociate_many(domain, irq_base, i); - return -EINVAL; +void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, + irq_hw_number_t hwirq_base, int count) +{ + int i; + + pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__, + of_node_full_name(domain->of_node), irq_base, (int)hwirq_base, count); + + for (i = 0; i < count; i++) { + irq_domain_associate(domain, irq_base + i, hwirq_base + i); + } } EXPORT_SYMBOL_GPL(irq_domain_associate_many); @@ -513,7 +341,9 @@ EXPORT_SYMBOL_GPL(irq_domain_associate_many); * * This routine is used for irq controllers which can choose the hardware * interrupt numbers they generate. In such a case it's simplest to use - * the linux irq as the hardware interrupt number. + * the linux irq as the hardware interrupt number. It still uses the linear + * or radix tree to store the mapping, but the irq controller can optimize + * the revmap path by using the hwirq directly. */ unsigned int irq_create_direct_mapping(struct irq_domain *domain) { @@ -522,17 +352,14 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) if (domain == NULL) domain = irq_default_domain; - if (WARN_ON(!domain || domain->revmap_type != IRQ_DOMAIN_MAP_NOMAP)) - return 0; - virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node)); if (!virq) { pr_debug("create_direct virq allocation failed\n"); return 0; } - if (virq >= domain->revmap_data.nomap.max_irq) { + if (virq >= domain->revmap_direct_max_irq) { pr_err("ERROR: no free irqs available below %i maximum\n", - domain->revmap_data.nomap.max_irq); + domain->revmap_direct_max_irq); irq_free_desc(virq); return 0; } @@ -569,9 +396,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain, if (domain == NULL) domain = irq_default_domain; if (domain == NULL) { - pr_warning("irq_create_mapping called for" - " NULL domain, hwirq=%lx\n", hwirq); - WARN_ON(1); + WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq); return 0; } pr_debug("-> using domain @%p\n", domain); @@ -583,10 +408,6 @@ unsigned int irq_create_mapping(struct irq_domain *domain, return virq; } - /* Get a virtual interrupt number */ - if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) - return irq_domain_legacy_revmap(domain, hwirq); - /* Allocate a virtual interrupt number */ hint = hwirq % nr_irqs; if (hint == 0) @@ -639,12 +460,7 @@ int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base, if (unlikely(ret < 0)) return ret; - ret = irq_domain_associate_many(domain, irq_base, hwirq_base, count); - if (unlikely(ret < 0)) { - irq_free_descs(irq_base, count); - return ret; - } - + irq_domain_associate_many(domain, irq_base, hwirq_base, count); return 0; } EXPORT_SYMBOL_GPL(irq_create_strict_mappings); @@ -671,8 +487,8 @@ unsigned int irq_create_of_mapping(struct device_node *controller, if (intsize > 0) return intspec[0]; #endif - pr_warning("no irq domain found for %s !\n", - of_node_full_name(controller)); + pr_warn("no irq domain found for %s !\n", + of_node_full_name(controller)); return 0; } @@ -714,11 +530,7 @@ void irq_dispose_mapping(unsigned int virq) if (WARN_ON(domain == NULL)) return; - /* Never unmap legacy interrupts */ - if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) - return; - - irq_domain_disassociate_many(domain, virq, 1); + irq_domain_disassociate(domain, virq); irq_free_desc(virq); } EXPORT_SYMBOL_GPL(irq_dispose_mapping); @@ -739,63 +551,51 @@ unsigned int irq_find_mapping(struct irq_domain *domain, if (domain == NULL) return 0; - switch (domain->revmap_type) { - case IRQ_DOMAIN_MAP_LEGACY: - return irq_domain_legacy_revmap(domain, hwirq); - case IRQ_DOMAIN_MAP_LINEAR: - return irq_linear_revmap(domain, hwirq); - case IRQ_DOMAIN_MAP_TREE: - rcu_read_lock(); - data = radix_tree_lookup(&domain->revmap_data.tree, hwirq); - rcu_read_unlock(); - if (data) - return data->irq; - break; - case IRQ_DOMAIN_MAP_NOMAP: + if (hwirq < domain->revmap_direct_max_irq) { data = irq_get_irq_data(hwirq); if (data && (data->domain == domain) && (data->hwirq == hwirq)) return hwirq; - break; } - return 0; -} -EXPORT_SYMBOL_GPL(irq_find_mapping); + /* Check if the hwirq is in the linear revmap. */ + if (hwirq < domain->revmap_size) + return domain->linear_revmap[hwirq]; -/** - * irq_linear_revmap() - Find a linux irq from a hw irq number. - * @domain: domain owning this hardware interrupt - * @hwirq: hardware irq number in that domain space - * - * This is a fast path that can be called directly by irq controller code to - * save a handful of instructions. - */ -unsigned int irq_linear_revmap(struct irq_domain *domain, - irq_hw_number_t hwirq) -{ - BUG_ON(domain->revmap_type != IRQ_DOMAIN_MAP_LINEAR); - - /* Check revmap bounds; complain if exceeded */ - if (WARN_ON(hwirq >= domain->revmap_data.linear.size)) - return 0; - - return domain->revmap_data.linear.revmap[hwirq]; + rcu_read_lock(); + data = radix_tree_lookup(&domain->revmap_tree, hwirq); + rcu_read_unlock(); + return data ? data->irq : 0; } -EXPORT_SYMBOL_GPL(irq_linear_revmap); +EXPORT_SYMBOL_GPL(irq_find_mapping); #ifdef CONFIG_IRQ_DOMAIN_DEBUG static int virq_debug_show(struct seq_file *m, void *private) { unsigned long flags; struct irq_desc *desc; - const char *p; - static const char none[] = "none"; - void *data; + struct irq_domain *domain; + struct radix_tree_iter iter; + void *data, **slot; int i; - seq_printf(m, "%-5s %-7s %-15s %-*s %s\n", "irq", "hwirq", + seq_printf(m, " %-16s %-6s %-10s %-10s %s\n", + "name", "mapped", "linear-max", "direct-max", "devtree-node"); + mutex_lock(&irq_domain_mutex); + list_for_each_entry(domain, &irq_domain_list, link) { + int count = 0; + radix_tree_for_each_slot(slot, &domain->revmap_tree, &iter, 0) + count++; + seq_printf(m, "%c%-16s %6u %10u %10u %s\n", + domain == irq_default_domain ? '*' : ' ', domain->name, + domain->revmap_size + count, domain->revmap_size, + domain->revmap_direct_max_irq, + domain->of_node ? of_node_full_name(domain->of_node) : ""); + } + mutex_unlock(&irq_domain_mutex); + + seq_printf(m, "%-5s %-7s %-15s %-*s %6s %-14s %s\n", "irq", "hwirq", "chip name", (int)(2 * sizeof(void *) + 2), "chip data", - "domain name"); + "active", "type", "domain"); for (i = 1; i < nr_irqs; i++) { desc = irq_to_desc(i); @@ -803,28 +603,28 @@ static int virq_debug_show(struct seq_file *m, void *private) continue; raw_spin_lock_irqsave(&desc->lock, flags); + domain = desc->irq_data.domain; - if (desc->action && desc->action->handler) { + if (domain) { struct irq_chip *chip; + int hwirq = desc->irq_data.hwirq; + bool direct; seq_printf(m, "%5d ", i); - seq_printf(m, "0x%05lx ", desc->irq_data.hwirq); + seq_printf(m, "0x%05x ", hwirq); chip = irq_desc_get_chip(desc); - if (chip && chip->name) - p = chip->name; - else - p = none; - seq_printf(m, "%-15s ", p); + seq_printf(m, "%-15s ", (chip && chip->name) ? chip->name : "none"); data = irq_desc_get_chip_data(desc); seq_printf(m, data ? "0x%p " : " %p ", data); - if (desc->irq_data.domain) - p = of_node_full_name(desc->irq_data.domain->of_node); - else - p = none; - seq_printf(m, "%s\n", p); + seq_printf(m, " %c ", (desc->action && desc->action->handler) ? '*' : ' '); + direct = (i == hwirq) && (i < domain->revmap_direct_max_irq); + seq_printf(m, "%6s%-8s ", + (hwirq < domain->revmap_size) ? "LINEAR" : "RADIX", + direct ? "(DIRECT)" : ""); + seq_printf(m, "%s\n", desc->irq_data.domain->name); } raw_spin_unlock_irqrestore(&desc->lock, flags); @@ -921,18 +721,3 @@ const struct irq_domain_ops irq_domain_simple_ops = { .xlate = irq_domain_xlate_onetwocell, }; EXPORT_SYMBOL_GPL(irq_domain_simple_ops); - -#ifdef CONFIG_OF_IRQ -void irq_domain_generate_simple(const struct of_device_id *match, - u64 phys_base, unsigned int irq_start) -{ - struct device_node *node; - pr_debug("looking for phys_base=%llx, irq_start=%i\n", - (unsigned long long) phys_base, (int) irq_start); - node = of_find_matching_node_by_address(NULL, match, phys_base); - if (node) - irq_domain_add_legacy(node, 32, irq_start, 0, - &irq_domain_simple_ops, NULL); -} -EXPORT_SYMBOL_GPL(irq_domain_generate_simple); -#endif diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 19ed5c425c3b..36f6ee181b0c 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -462,6 +462,8 @@ int show_interrupts(struct seq_file *p, void *v) } else { seq_printf(p, " %8s", "None"); } + if (desc->irq_data.domain) + seq_printf(p, " %*d", prec, (int) desc->irq_data.hwirq); #ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); #endif diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 42670e9b44e0..c7f31aa272f7 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -51,59 +51,28 @@ static int check_clock(const clockid_t which_clock) return error; } -static inline union cpu_time_count +static inline unsigned long long timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) { - union cpu_time_count ret; - ret.sched = 0; /* high half always zero when .cpu used */ + unsigned long long ret; + + ret = 0; /* high half always zero when .cpu used */ if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; + ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; } else { - ret.cpu = timespec_to_cputime(tp); + ret = cputime_to_expires(timespec_to_cputime(tp)); } return ret; } static void sample_to_timespec(const clockid_t which_clock, - union cpu_time_count cpu, + unsigned long long expires, struct timespec *tp) { if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) - *tp = ns_to_timespec(cpu.sched); + *tp = ns_to_timespec(expires); else - cputime_to_timespec(cpu.cpu, tp); -} - -static inline int cpu_time_before(const clockid_t which_clock, - union cpu_time_count now, - union cpu_time_count then) -{ - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - return now.sched < then.sched; - } else { - return now.cpu < then.cpu; - } -} -static inline void cpu_time_add(const clockid_t which_clock, - union cpu_time_count *acc, - union cpu_time_count val) -{ - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - acc->sched += val.sched; - } else { - acc->cpu += val.cpu; - } -} -static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, - union cpu_time_count a, - union cpu_time_count b) -{ - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - a.sched -= b.sched; - } else { - a.cpu -= b.cpu; - } - return a; + cputime_to_timespec((__force cputime_t)expires, tp); } /* @@ -111,47 +80,31 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, * given the current clock sample. */ static void bump_cpu_timer(struct k_itimer *timer, - union cpu_time_count now) + unsigned long long now) { int i; + unsigned long long delta, incr; - if (timer->it.cpu.incr.sched == 0) + if (timer->it.cpu.incr == 0) return; - if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { - unsigned long long delta, incr; + if (now < timer->it.cpu.expires) + return; - if (now.sched < timer->it.cpu.expires.sched) - return; - incr = timer->it.cpu.incr.sched; - delta = now.sched + incr - timer->it.cpu.expires.sched; - /* Don't use (incr*2 < delta), incr*2 might overflow. */ - for (i = 0; incr < delta - incr; i++) - incr = incr << 1; - for (; i >= 0; incr >>= 1, i--) { - if (delta < incr) - continue; - timer->it.cpu.expires.sched += incr; - timer->it_overrun += 1 << i; - delta -= incr; - } - } else { - cputime_t delta, incr; + incr = timer->it.cpu.incr; + delta = now + incr - timer->it.cpu.expires; - if (now.cpu < timer->it.cpu.expires.cpu) - return; - incr = timer->it.cpu.incr.cpu; - delta = now.cpu + incr - timer->it.cpu.expires.cpu; - /* Don't use (incr*2 < delta), incr*2 might overflow. */ - for (i = 0; incr < delta - incr; i++) - incr += incr; - for (; i >= 0; incr = incr >> 1, i--) { - if (delta < incr) - continue; - timer->it.cpu.expires.cpu += incr; - timer->it_overrun += 1 << i; - delta -= incr; - } + /* Don't use (incr*2 < delta), incr*2 might overflow. */ + for (i = 0; incr < delta - incr; i++) + incr = incr << 1; + + for (; i >= 0; incr >>= 1, i--) { + if (delta < incr) + continue; + + timer->it.cpu.expires += incr; + timer->it_overrun += 1 << i; + delta -= incr; } } @@ -170,21 +123,21 @@ static inline int task_cputime_zero(const struct task_cputime *cputime) return 0; } -static inline cputime_t prof_ticks(struct task_struct *p) +static inline unsigned long long prof_ticks(struct task_struct *p) { cputime_t utime, stime; task_cputime(p, &utime, &stime); - return utime + stime; + return cputime_to_expires(utime + stime); } -static inline cputime_t virt_ticks(struct task_struct *p) +static inline unsigned long long virt_ticks(struct task_struct *p) { cputime_t utime; task_cputime(p, &utime, NULL); - return utime; + return cputime_to_expires(utime); } static int @@ -225,19 +178,19 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) * Sample a per-thread clock for the given task. */ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, - union cpu_time_count *cpu) + unsigned long long *sample) { switch (CPUCLOCK_WHICH(which_clock)) { default: return -EINVAL; case CPUCLOCK_PROF: - cpu->cpu = prof_ticks(p); + *sample = prof_ticks(p); break; case CPUCLOCK_VIRT: - cpu->cpu = virt_ticks(p); + *sample = virt_ticks(p); break; case CPUCLOCK_SCHED: - cpu->sched = task_sched_runtime(p); + *sample = task_sched_runtime(p); break; } return 0; @@ -284,7 +237,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) */ static int cpu_clock_sample_group(const clockid_t which_clock, struct task_struct *p, - union cpu_time_count *cpu) + unsigned long long *sample) { struct task_cputime cputime; @@ -293,15 +246,15 @@ static int cpu_clock_sample_group(const clockid_t which_clock, return -EINVAL; case CPUCLOCK_PROF: thread_group_cputime(p, &cputime); - cpu->cpu = cputime.utime + cputime.stime; + *sample = cputime_to_expires(cputime.utime + cputime.stime); break; case CPUCLOCK_VIRT: thread_group_cputime(p, &cputime); - cpu->cpu = cputime.utime; + *sample = cputime_to_expires(cputime.utime); break; case CPUCLOCK_SCHED: thread_group_cputime(p, &cputime); - cpu->sched = cputime.sum_exec_runtime; + *sample = cputime.sum_exec_runtime; break; } return 0; @@ -312,7 +265,7 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) { const pid_t pid = CPUCLOCK_PID(which_clock); int error = -EINVAL; - union cpu_time_count rtn; + unsigned long long rtn; if (pid == 0) { /* @@ -446,6 +399,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer) return ret; } +static void cleanup_timers_list(struct list_head *head, + unsigned long long curr) +{ + struct cpu_timer_list *timer, *next; + + list_for_each_entry_safe(timer, next, head, entry) + list_del_init(&timer->entry); +} + /* * Clean out CPU timers still ticking when a thread exited. The task * pointer is cleared, and the expiry time is replaced with the residual @@ -456,37 +418,12 @@ static void cleanup_timers(struct list_head *head, cputime_t utime, cputime_t stime, unsigned long long sum_exec_runtime) { - struct cpu_timer_list *timer, *next; - cputime_t ptime = utime + stime; - - list_for_each_entry_safe(timer, next, head, entry) { - list_del_init(&timer->entry); - if (timer->expires.cpu < ptime) { - timer->expires.cpu = 0; - } else { - timer->expires.cpu -= ptime; - } - } - ++head; - list_for_each_entry_safe(timer, next, head, entry) { - list_del_init(&timer->entry); - if (timer->expires.cpu < utime) { - timer->expires.cpu = 0; - } else { - timer->expires.cpu -= utime; - } - } + cputime_t ptime = utime + stime; - ++head; - list_for_each_entry_safe(timer, next, head, entry) { - list_del_init(&timer->entry); - if (timer->expires.sched < sum_exec_runtime) { - timer->expires.sched = 0; - } else { - timer->expires.sched -= sum_exec_runtime; - } - } + cleanup_timers_list(head, cputime_to_expires(ptime)); + cleanup_timers_list(++head, cputime_to_expires(utime)); + cleanup_timers_list(++head, sum_exec_runtime); } /* @@ -516,17 +453,21 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) tsk->se.sum_exec_runtime + sig->sum_sched_runtime); } -static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) +static void clear_dead_task(struct k_itimer *itimer, unsigned long long now) { + struct cpu_timer_list *timer = &itimer->it.cpu; + /* * That's all for this thread or process. * We leave our residual in expires to be reported. */ - put_task_struct(timer->it.cpu.task); - timer->it.cpu.task = NULL; - timer->it.cpu.expires = cpu_time_sub(timer->it_clock, - timer->it.cpu.expires, - now); + put_task_struct(timer->task); + timer->task = NULL; + if (timer->expires < now) { + timer->expires = 0; + } else { + timer->expires -= now; + } } static inline int expires_gt(cputime_t expires, cputime_t new_exp) @@ -558,14 +499,14 @@ static void arm_timer(struct k_itimer *timer) listpos = head; list_for_each_entry(next, head, entry) { - if (cpu_time_before(timer->it_clock, nt->expires, next->expires)) + if (nt->expires < next->expires) break; listpos = &next->entry; } list_add(&nt->entry, listpos); if (listpos == head) { - union cpu_time_count *exp = &nt->expires; + unsigned long long exp = nt->expires; /* * We are the new earliest-expiring POSIX 1.b timer, hence @@ -576,17 +517,17 @@ static void arm_timer(struct k_itimer *timer) switch (CPUCLOCK_WHICH(timer->it_clock)) { case CPUCLOCK_PROF: - if (expires_gt(cputime_expires->prof_exp, exp->cpu)) - cputime_expires->prof_exp = exp->cpu; + if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp))) + cputime_expires->prof_exp = expires_to_cputime(exp); break; case CPUCLOCK_VIRT: - if (expires_gt(cputime_expires->virt_exp, exp->cpu)) - cputime_expires->virt_exp = exp->cpu; + if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp))) + cputime_expires->virt_exp = expires_to_cputime(exp); break; case CPUCLOCK_SCHED: if (cputime_expires->sched_exp == 0 || - cputime_expires->sched_exp > exp->sched) - cputime_expires->sched_exp = exp->sched; + cputime_expires->sched_exp > exp) + cputime_expires->sched_exp = exp; break; } } @@ -601,20 +542,20 @@ static void cpu_timer_fire(struct k_itimer *timer) /* * User don't want any signal. */ - timer->it.cpu.expires.sched = 0; + timer->it.cpu.expires = 0; } else if (unlikely(timer->sigq == NULL)) { /* * This a special case for clock_nanosleep, * not a normal timer from sys_timer_create. */ wake_up_process(timer->it_process); - timer->it.cpu.expires.sched = 0; - } else if (timer->it.cpu.incr.sched == 0) { + timer->it.cpu.expires = 0; + } else if (timer->it.cpu.incr == 0) { /* * One-shot timer. Clear it as soon as it's fired. */ posix_timer_event(timer, 0); - timer->it.cpu.expires.sched = 0; + timer->it.cpu.expires = 0; } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { /* * The signal did not get queued because the signal @@ -632,7 +573,7 @@ static void cpu_timer_fire(struct k_itimer *timer) */ static int cpu_timer_sample_group(const clockid_t which_clock, struct task_struct *p, - union cpu_time_count *cpu) + unsigned long long *sample) { struct task_cputime cputime; @@ -641,13 +582,13 @@ static int cpu_timer_sample_group(const clockid_t which_clock, default: return -EINVAL; case CPUCLOCK_PROF: - cpu->cpu = cputime.utime + cputime.stime; + *sample = cputime_to_expires(cputime.utime + cputime.stime); break; case CPUCLOCK_VIRT: - cpu->cpu = cputime.utime; + *sample = cputime_to_expires(cputime.utime); break; case CPUCLOCK_SCHED: - cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); + *sample = cputime.sum_exec_runtime + task_delta_exec(p); break; } return 0; @@ -694,7 +635,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, struct itimerspec *new, struct itimerspec *old) { struct task_struct *p = timer->it.cpu.task; - union cpu_time_count old_expires, new_expires, old_incr, val; + unsigned long long old_expires, new_expires, old_incr, val; int ret; if (unlikely(p == NULL)) { @@ -749,7 +690,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, } if (old) { - if (old_expires.sched == 0) { + if (old_expires == 0) { old->it_value.tv_sec = 0; old->it_value.tv_nsec = 0; } else { @@ -764,11 +705,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, * new setting. */ bump_cpu_timer(timer, val); - if (cpu_time_before(timer->it_clock, val, - timer->it.cpu.expires)) { - old_expires = cpu_time_sub( - timer->it_clock, - timer->it.cpu.expires, val); + if (val < timer->it.cpu.expires) { + old_expires = timer->it.cpu.expires - val; sample_to_timespec(timer->it_clock, old_expires, &old->it_value); @@ -791,8 +729,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, goto out; } - if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) { - cpu_time_add(timer->it_clock, &new_expires, val); + if (new_expires != 0 && !(flags & TIMER_ABSTIME)) { + new_expires += val; } /* @@ -801,8 +739,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, * arm the timer (we'll just fake it for timer_gettime). */ timer->it.cpu.expires = new_expires; - if (new_expires.sched != 0 && - cpu_time_before(timer->it_clock, val, new_expires)) { + if (new_expires != 0 && val < new_expires) { arm_timer(timer); } @@ -826,8 +763,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, timer->it_overrun_last = 0; timer->it_overrun = -1; - if (new_expires.sched != 0 && - !cpu_time_before(timer->it_clock, val, new_expires)) { + if (new_expires != 0 && !(val < new_expires)) { /* * The designated time already passed, so we notify * immediately, even if the thread never runs to @@ -849,7 +785,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) { - union cpu_time_count now; + unsigned long long now; struct task_struct *p = timer->it.cpu.task; int clear_dead; @@ -859,7 +795,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) sample_to_timespec(timer->it_clock, timer->it.cpu.incr, &itp->it_interval); - if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */ + if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */ itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; return; } @@ -891,7 +827,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) */ put_task_struct(p); timer->it.cpu.task = NULL; - timer->it.cpu.expires.sched = 0; + timer->it.cpu.expires = 0; read_unlock(&tasklist_lock); goto dead; } else { @@ -912,10 +848,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) goto dead; } - if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) { + if (now < timer->it.cpu.expires) { sample_to_timespec(timer->it_clock, - cpu_time_sub(timer->it_clock, - timer->it.cpu.expires, now), + timer->it.cpu.expires - now, &itp->it_value); } else { /* @@ -927,6 +862,28 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) } } +static unsigned long long +check_timers_list(struct list_head *timers, + struct list_head *firing, + unsigned long long curr) +{ + int maxfire = 20; + + while (!list_empty(timers)) { + struct cpu_timer_list *t; + + t = list_first_entry(timers, struct cpu_timer_list, entry); + + if (!--maxfire || curr < t->expires) + return t->expires; + + t->firing = 1; + list_move_tail(&t->entry, firing); + } + + return 0; +} + /* * Check for any per-thread CPU timers that have fired and move them off * the tsk->cpu_timers[N] list onto the firing list. Here we update the @@ -935,54 +892,20 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) static void check_thread_timers(struct task_struct *tsk, struct list_head *firing) { - int maxfire; struct list_head *timers = tsk->cpu_timers; struct signal_struct *const sig = tsk->signal; + struct task_cputime *tsk_expires = &tsk->cputime_expires; + unsigned long long expires; unsigned long soft; - maxfire = 20; - tsk->cputime_expires.prof_exp = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *t = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) { - tsk->cputime_expires.prof_exp = t->expires.cpu; - break; - } - t->firing = 1; - list_move_tail(&t->entry, firing); - } + expires = check_timers_list(timers, firing, prof_ticks(tsk)); + tsk_expires->prof_exp = expires_to_cputime(expires); - ++timers; - maxfire = 20; - tsk->cputime_expires.virt_exp = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *t = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) { - tsk->cputime_expires.virt_exp = t->expires.cpu; - break; - } - t->firing = 1; - list_move_tail(&t->entry, firing); - } + expires = check_timers_list(++timers, firing, virt_ticks(tsk)); + tsk_expires->virt_exp = expires_to_cputime(expires); - ++timers; - maxfire = 20; - tsk->cputime_expires.sched_exp = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *t = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { - tsk->cputime_expires.sched_exp = t->expires.sched; - break; - } - t->firing = 1; - list_move_tail(&t->entry, firing); - } + tsk_expires->sched_exp = check_timers_list(++timers, firing, + tsk->se.sum_exec_runtime); /* * Check for the special case thread timers. @@ -1030,7 +953,8 @@ static void stop_process_timers(struct signal_struct *sig) static u32 onecputick; static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, - cputime_t *expires, cputime_t cur_time, int signo) + unsigned long long *expires, + unsigned long long cur_time, int signo) { if (!it->expires) return; @@ -1066,9 +990,8 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, static void check_process_timers(struct task_struct *tsk, struct list_head *firing) { - int maxfire; struct signal_struct *const sig = tsk->signal; - cputime_t utime, ptime, virt_expires, prof_expires; + unsigned long long utime, ptime, virt_expires, prof_expires; unsigned long long sum_sched_runtime, sched_expires; struct list_head *timers = sig->cpu_timers; struct task_cputime cputime; @@ -1078,52 +1001,13 @@ static void check_process_timers(struct task_struct *tsk, * Collect the current process totals. */ thread_group_cputimer(tsk, &cputime); - utime = cputime.utime; - ptime = utime + cputime.stime; + utime = cputime_to_expires(cputime.utime); + ptime = utime + cputime_to_expires(cputime.stime); sum_sched_runtime = cputime.sum_exec_runtime; - maxfire = 20; - prof_expires = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *tl = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || ptime < tl->expires.cpu) { - prof_expires = tl->expires.cpu; - break; - } - tl->firing = 1; - list_move_tail(&tl->entry, firing); - } - ++timers; - maxfire = 20; - virt_expires = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *tl = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || utime < tl->expires.cpu) { - virt_expires = tl->expires.cpu; - break; - } - tl->firing = 1; - list_move_tail(&tl->entry, firing); - } - - ++timers; - maxfire = 20; - sched_expires = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *tl = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || sum_sched_runtime < tl->expires.sched) { - sched_expires = tl->expires.sched; - break; - } - tl->firing = 1; - list_move_tail(&tl->entry, firing); - } + prof_expires = check_timers_list(timers, firing, ptime); + virt_expires = check_timers_list(++timers, firing, utime); + sched_expires = check_timers_list(++timers, firing, sum_sched_runtime); /* * Check for the special case process timers. @@ -1162,8 +1046,8 @@ static void check_process_timers(struct task_struct *tsk, } } - sig->cputime_expires.prof_exp = prof_expires; - sig->cputime_expires.virt_exp = virt_expires; + sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires); + sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires); sig->cputime_expires.sched_exp = sched_expires; if (task_cputime_zero(&sig->cputime_expires)) stop_process_timers(sig); @@ -1176,7 +1060,7 @@ static void check_process_timers(struct task_struct *tsk, void posix_cpu_timer_schedule(struct k_itimer *timer) { struct task_struct *p = timer->it.cpu.task; - union cpu_time_count now; + unsigned long long now; if (unlikely(p == NULL)) /* @@ -1205,7 +1089,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) */ put_task_struct(p); timer->it.cpu.task = p = NULL; - timer->it.cpu.expires.sched = 0; + timer->it.cpu.expires = 0; goto out_unlock; } else if (unlikely(p->exit_state) && thread_group_empty(p)) { /* @@ -1213,6 +1097,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) * not yet reaped. Take this opportunity to * drop our task ref. */ + cpu_timer_sample_group(timer->it_clock, p, &now); clear_dead_task(timer, now); goto out_unlock; } @@ -1387,7 +1272,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval) { - union cpu_time_count now; + unsigned long long now; BUG_ON(clock_idx == CPUCLOCK_SCHED); cpu_timer_sample_group(clock_idx, tsk, &now); @@ -1399,17 +1284,17 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, * it to be absolute. */ if (*oldval) { - if (*oldval <= now.cpu) { + if (*oldval <= now) { /* Just about to fire. */ *oldval = cputime_one_jiffy; } else { - *oldval -= now.cpu; + *oldval -= now; } } if (!*newval) goto out; - *newval += now.cpu; + *newval += now; } /* @@ -1459,7 +1344,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, } while (!signal_pending(current)) { - if (timer.it.cpu.expires.sched == 0) { + if (timer.it.cpu.expires == 0) { /* * Our timer fired and was reset, below * deletion can not fail. diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 17d7065c3872..5aef494fc8b4 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -162,6 +162,39 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) */ /** + * cputimer_running - return true if cputimer is running + * + * @tsk: Pointer to target task. + */ +static inline bool cputimer_running(struct task_struct *tsk) + +{ + struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + + if (!cputimer->running) + return false; + + /* + * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime + * in __exit_signal(), we won't account to the signal struct further + * cputime consumed by that task, even though the task can still be + * ticking after __exit_signal(). + * + * In order to keep a consistent behaviour between thread group cputime + * and thread group cputimer accounting, lets also ignore the cputime + * elapsing after __exit_signal() in any thread group timer running. + * + * This makes sure that POSIX CPU clocks and timers are synchronized, so + * that a POSIX CPU timer won't expire while the corresponding POSIX CPU + * clock delta is behind the expiring timer value. + */ + if (unlikely(!tsk->sighand)) + return false; + + return true; +} + +/** * account_group_user_time - Maintain utime for a thread group. * * @tsk: Pointer to task structure. @@ -176,7 +209,7 @@ static inline void account_group_user_time(struct task_struct *tsk, { struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; - if (!cputimer->running) + if (!cputimer_running(tsk)) return; raw_spin_lock(&cputimer->lock); @@ -199,7 +232,7 @@ static inline void account_group_system_time(struct task_struct *tsk, { struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; - if (!cputimer->running) + if (!cputimer_running(tsk)) return; raw_spin_lock(&cputimer->lock); @@ -222,7 +255,7 @@ static inline void account_group_exec_runtime(struct task_struct *tsk, { struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; - if (!cputimer->running) + if (!cputimer_running(tsk)) return; raw_spin_lock(&cputimer->lock); diff --git a/kernel/time/Makefile b/kernel/time/Makefile index ff7d9d2ab504..9250130646f5 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -4,6 +4,8 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o +obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o obj-$(CONFIG_TIMER_STATS) += timer_stats.o +obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index f11d83b12949..eec50fcef9e4 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -199,6 +199,13 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) } +ktime_t alarm_expires_remaining(const struct alarm *alarm) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + return ktime_sub(alarm->node.expires, base->gettime()); +} +EXPORT_SYMBOL_GPL(alarm_expires_remaining); + #ifdef CONFIG_RTC_CLASS /** * alarmtimer_suspend - Suspend time callback @@ -303,9 +310,10 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, alarm->type = type; alarm->state = ALARMTIMER_STATE_INACTIVE; } +EXPORT_SYMBOL_GPL(alarm_init); /** - * alarm_start - Sets an alarm to fire + * alarm_start - Sets an absolute alarm to fire * @alarm: ptr to alarm to set * @start: time to run the alarm */ @@ -323,6 +331,34 @@ int alarm_start(struct alarm *alarm, ktime_t start) spin_unlock_irqrestore(&base->lock, flags); return ret; } +EXPORT_SYMBOL_GPL(alarm_start); + +/** + * alarm_start_relative - Sets a relative alarm to fire + * @alarm: ptr to alarm to set + * @start: time relative to now to run the alarm + */ +int alarm_start_relative(struct alarm *alarm, ktime_t start) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + + start = ktime_add(start, base->gettime()); + return alarm_start(alarm, start); +} +EXPORT_SYMBOL_GPL(alarm_start_relative); + +void alarm_restart(struct alarm *alarm) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + unsigned long flags; + + spin_lock_irqsave(&base->lock, flags); + hrtimer_set_expires(&alarm->timer, alarm->node.expires); + hrtimer_restart(&alarm->timer); + alarmtimer_enqueue(base, alarm); + spin_unlock_irqrestore(&base->lock, flags); +} +EXPORT_SYMBOL_GPL(alarm_restart); /** * alarm_try_to_cancel - Tries to cancel an alarm timer @@ -344,6 +380,7 @@ int alarm_try_to_cancel(struct alarm *alarm) spin_unlock_irqrestore(&base->lock, flags); return ret; } +EXPORT_SYMBOL_GPL(alarm_try_to_cancel); /** @@ -361,6 +398,7 @@ int alarm_cancel(struct alarm *alarm) cpu_relax(); } } +EXPORT_SYMBOL_GPL(alarm_cancel); u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) @@ -393,8 +431,15 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) alarm->node.expires = ktime_add(alarm->node.expires, interval); return overrun; } +EXPORT_SYMBOL_GPL(alarm_forward); +u64 alarm_forward_now(struct alarm *alarm, ktime_t interval) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + return alarm_forward(alarm, base->gettime(), interval); +} +EXPORT_SYMBOL_GPL(alarm_forward_now); /** diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index c6d6400ee137..38959c866789 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -15,20 +15,23 @@ #include <linux/hrtimer.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/notifier.h> #include <linux/smp.h> +#include <linux/device.h> #include "tick-internal.h" /* The registered clock event devices */ static LIST_HEAD(clockevent_devices); static LIST_HEAD(clockevents_released); - -/* Notification for clock events */ -static RAW_NOTIFIER_HEAD(clockevents_chain); - /* Protection for the above */ static DEFINE_RAW_SPINLOCK(clockevents_lock); +/* Protection for unbind operations */ +static DEFINE_MUTEX(clockevents_mutex); + +struct ce_unbind { + struct clock_event_device *ce; + int res; +}; /** * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds @@ -232,47 +235,107 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, return (rc && force) ? clockevents_program_min_delta(dev) : rc; } -/** - * clockevents_register_notifier - register a clock events change listener +/* + * Called after a notify add to make devices available which were + * released from the notifier call. */ -int clockevents_register_notifier(struct notifier_block *nb) +static void clockevents_notify_released(void) { - unsigned long flags; - int ret; + struct clock_event_device *dev; - raw_spin_lock_irqsave(&clockevents_lock, flags); - ret = raw_notifier_chain_register(&clockevents_chain, nb); - raw_spin_unlock_irqrestore(&clockevents_lock, flags); + while (!list_empty(&clockevents_released)) { + dev = list_entry(clockevents_released.next, + struct clock_event_device, list); + list_del(&dev->list); + list_add(&dev->list, &clockevent_devices); + tick_check_new_device(dev); + } +} - return ret; +/* + * Try to install a replacement clock event device + */ +static int clockevents_replace(struct clock_event_device *ced) +{ + struct clock_event_device *dev, *newdev = NULL; + + list_for_each_entry(dev, &clockevent_devices, list) { + if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED) + continue; + + if (!tick_check_replacement(newdev, dev)) + continue; + + if (!try_module_get(dev->owner)) + continue; + + if (newdev) + module_put(newdev->owner); + newdev = dev; + } + if (newdev) { + tick_install_replacement(newdev); + list_del_init(&ced->list); + } + return newdev ? 0 : -EBUSY; } /* - * Notify about a clock event change. Called with clockevents_lock - * held. + * Called with clockevents_mutex and clockevents_lock held */ -static void clockevents_do_notify(unsigned long reason, void *dev) +static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu) { - raw_notifier_call_chain(&clockevents_chain, reason, dev); + /* Fast track. Device is unused */ + if (ced->mode == CLOCK_EVT_MODE_UNUSED) { + list_del_init(&ced->list); + return 0; + } + + return ced == per_cpu(tick_cpu_device, cpu).evtdev ? -EAGAIN : -EBUSY; } /* - * Called after a notify add to make devices available which were - * released from the notifier call. + * SMP function call to unbind a device */ -static void clockevents_notify_released(void) +static void __clockevents_unbind(void *arg) { - struct clock_event_device *dev; + struct ce_unbind *cu = arg; + int res; + + raw_spin_lock(&clockevents_lock); + res = __clockevents_try_unbind(cu->ce, smp_processor_id()); + if (res == -EAGAIN) + res = clockevents_replace(cu->ce); + cu->res = res; + raw_spin_unlock(&clockevents_lock); +} - while (!list_empty(&clockevents_released)) { - dev = list_entry(clockevents_released.next, - struct clock_event_device, list); - list_del(&dev->list); - list_add(&dev->list, &clockevent_devices); - clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); - } +/* + * Issues smp function call to unbind a per cpu device. Called with + * clockevents_mutex held. + */ +static int clockevents_unbind(struct clock_event_device *ced, int cpu) +{ + struct ce_unbind cu = { .ce = ced, .res = -ENODEV }; + + smp_call_function_single(cpu, __clockevents_unbind, &cu, 1); + return cu.res; } +/* + * Unbind a clockevents device. + */ +int clockevents_unbind_device(struct clock_event_device *ced, int cpu) +{ + int ret; + + mutex_lock(&clockevents_mutex); + ret = clockevents_unbind(ced, cpu); + mutex_unlock(&clockevents_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(clockevents_unbind); + /** * clockevents_register_device - register a clock event device * @dev: device to register @@ -290,7 +353,7 @@ void clockevents_register_device(struct clock_event_device *dev) raw_spin_lock_irqsave(&clockevents_lock, flags); list_add(&dev->list, &clockevent_devices); - clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); + tick_check_new_device(dev); clockevents_notify_released(); raw_spin_unlock_irqrestore(&clockevents_lock, flags); @@ -386,6 +449,7 @@ void clockevents_exchange_device(struct clock_event_device *old, * released list and do a notify add later. */ if (old) { + module_put(old->owner); clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); list_del(&old->list); list_add(&old->list, &clockevents_released); @@ -433,10 +497,36 @@ void clockevents_notify(unsigned long reason, void *arg) int cpu; raw_spin_lock_irqsave(&clockevents_lock, flags); - clockevents_do_notify(reason, arg); switch (reason) { + case CLOCK_EVT_NOTIFY_BROADCAST_ON: + case CLOCK_EVT_NOTIFY_BROADCAST_OFF: + case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: + tick_broadcast_on_off(reason, arg); + break; + + case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: + case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: + tick_broadcast_oneshot_control(reason); + break; + + case CLOCK_EVT_NOTIFY_CPU_DYING: + tick_handover_do_timer(arg); + break; + + case CLOCK_EVT_NOTIFY_SUSPEND: + tick_suspend(); + tick_suspend_broadcast(); + break; + + case CLOCK_EVT_NOTIFY_RESUME: + tick_resume(); + break; + case CLOCK_EVT_NOTIFY_CPU_DEAD: + tick_shutdown_broadcast_oneshot(arg); + tick_shutdown_broadcast(arg); + tick_shutdown(arg); /* * Unregister the clock event devices which were * released from the users in the notify chain. @@ -462,4 +552,123 @@ void clockevents_notify(unsigned long reason, void *arg) raw_spin_unlock_irqrestore(&clockevents_lock, flags); } EXPORT_SYMBOL_GPL(clockevents_notify); + +#ifdef CONFIG_SYSFS +struct bus_type clockevents_subsys = { + .name = "clockevents", + .dev_name = "clockevent", +}; + +static DEFINE_PER_CPU(struct device, tick_percpu_dev); +static struct tick_device *tick_get_tick_dev(struct device *dev); + +static ssize_t sysfs_show_current_tick_dev(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct tick_device *td; + ssize_t count = 0; + + raw_spin_lock_irq(&clockevents_lock); + td = tick_get_tick_dev(dev); + if (td && td->evtdev) + count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name); + raw_spin_unlock_irq(&clockevents_lock); + return count; +} +static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL); + +/* We don't support the abomination of removable broadcast devices */ +static ssize_t sysfs_unbind_tick_dev(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char name[CS_NAME_LEN]; + size_t ret = sysfs_get_uname(buf, name, count); + struct clock_event_device *ce; + + if (ret < 0) + return ret; + + ret = -ENODEV; + mutex_lock(&clockevents_mutex); + raw_spin_lock_irq(&clockevents_lock); + list_for_each_entry(ce, &clockevent_devices, list) { + if (!strcmp(ce->name, name)) { + ret = __clockevents_try_unbind(ce, dev->id); + break; + } + } + raw_spin_unlock_irq(&clockevents_lock); + /* + * We hold clockevents_mutex, so ce can't go away + */ + if (ret == -EAGAIN) + ret = clockevents_unbind(ce, dev->id); + mutex_unlock(&clockevents_mutex); + return ret ? ret : count; +} +static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev); + +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +static struct device tick_bc_dev = { + .init_name = "broadcast", + .id = 0, + .bus = &clockevents_subsys, +}; + +static struct tick_device *tick_get_tick_dev(struct device *dev) +{ + return dev == &tick_bc_dev ? tick_get_broadcast_device() : + &per_cpu(tick_cpu_device, dev->id); +} + +static __init int tick_broadcast_init_sysfs(void) +{ + int err = device_register(&tick_bc_dev); + + if (!err) + err = device_create_file(&tick_bc_dev, &dev_attr_current_device); + return err; +} +#else +static struct tick_device *tick_get_tick_dev(struct device *dev) +{ + return &per_cpu(tick_cpu_device, dev->id); +} +static inline int tick_broadcast_init_sysfs(void) { return 0; } #endif + +static int __init tick_init_sysfs(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct device *dev = &per_cpu(tick_percpu_dev, cpu); + int err; + + dev->id = cpu; + dev->bus = &clockevents_subsys; + err = device_register(dev); + if (!err) + err = device_create_file(dev, &dev_attr_current_device); + if (!err) + err = device_create_file(dev, &dev_attr_unbind_device); + if (err) + return err; + } + return tick_broadcast_init_sysfs(); +} + +static int __init clockevents_init_sysfs(void) +{ + int err = subsys_system_register(&clockevents_subsys, NULL); + + if (!err) + err = tick_init_sysfs(); + return err; +} +device_initcall(clockevents_init_sysfs); +#endif /* SYSFS */ + +#endif /* GENERIC_CLOCK_EVENTS */ diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c9583382141a..50a8736757f3 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -31,6 +31,8 @@ #include <linux/tick.h> #include <linux/kthread.h> +#include "tick-internal.h" + void timecounter_init(struct timecounter *tc, const struct cyclecounter *cc, u64 start_tstamp) @@ -174,11 +176,12 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) static struct clocksource *curr_clocksource; static LIST_HEAD(clocksource_list); static DEFINE_MUTEX(clocksource_mutex); -static char override_name[32]; +static char override_name[CS_NAME_LEN]; static int finished_booting; #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static void clocksource_watchdog_work(struct work_struct *work); +static void clocksource_select(void); static LIST_HEAD(watchdog_list); static struct clocksource *watchdog; @@ -299,13 +302,30 @@ static void clocksource_watchdog(unsigned long data) if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { + /* Mark it valid for high-res. */ cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; + + /* + * clocksource_done_booting() will sort it if + * finished_booting is not set yet. + */ + if (!finished_booting) + continue; + /* - * We just marked the clocksource as highres-capable, - * notify the rest of the system as well so that we - * transition into high-res mode: + * If this is not the current clocksource let + * the watchdog thread reselect it. Due to the + * change to high res this clocksource might + * be preferred now. If it is the current + * clocksource let the tick code know about + * that change. */ - tick_clock_notify(); + if (cs != curr_clocksource) { + cs->flags |= CLOCK_SOURCE_RESELECT; + schedule_work(&watchdog_work); + } else { + tick_clock_notify(); + } } } @@ -388,44 +408,39 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) static void clocksource_dequeue_watchdog(struct clocksource *cs) { - struct clocksource *tmp; unsigned long flags; spin_lock_irqsave(&watchdog_lock, flags); - if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { - /* cs is a watched clocksource. */ - list_del_init(&cs->wd_list); - } else if (cs == watchdog) { - /* Reset watchdog cycles */ - clocksource_reset_watchdog(); - /* Current watchdog is removed. Find an alternative. */ - watchdog = NULL; - list_for_each_entry(tmp, &clocksource_list, list) { - if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY) - continue; - if (!watchdog || tmp->rating > watchdog->rating) - watchdog = tmp; + if (cs != watchdog) { + if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { + /* cs is a watched clocksource. */ + list_del_init(&cs->wd_list); + /* Check if the watchdog timer needs to be stopped. */ + clocksource_stop_watchdog(); } } - cs->flags &= ~CLOCK_SOURCE_WATCHDOG; - /* Check if the watchdog timer needs to be stopped. */ - clocksource_stop_watchdog(); spin_unlock_irqrestore(&watchdog_lock, flags); } -static int clocksource_watchdog_kthread(void *data) +static int __clocksource_watchdog_kthread(void) { struct clocksource *cs, *tmp; unsigned long flags; LIST_HEAD(unstable); + int select = 0; - mutex_lock(&clocksource_mutex); spin_lock_irqsave(&watchdog_lock, flags); - list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) + list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { if (cs->flags & CLOCK_SOURCE_UNSTABLE) { list_del_init(&cs->wd_list); list_add(&cs->wd_list, &unstable); + select = 1; } + if (cs->flags & CLOCK_SOURCE_RESELECT) { + cs->flags &= ~CLOCK_SOURCE_RESELECT; + select = 1; + } + } /* Check if the watchdog timer needs to be stopped. */ clocksource_stop_watchdog(); spin_unlock_irqrestore(&watchdog_lock, flags); @@ -435,10 +450,23 @@ static int clocksource_watchdog_kthread(void *data) list_del_init(&cs->wd_list); __clocksource_change_rating(cs, 0); } + return select; +} + +static int clocksource_watchdog_kthread(void *data) +{ + mutex_lock(&clocksource_mutex); + if (__clocksource_watchdog_kthread()) + clocksource_select(); mutex_unlock(&clocksource_mutex); return 0; } +static bool clocksource_is_watchdog(struct clocksource *cs) +{ + return cs == watchdog; +} + #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ static void clocksource_enqueue_watchdog(struct clocksource *cs) @@ -449,7 +477,8 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } static inline void clocksource_resume_watchdog(void) { } -static inline int clocksource_watchdog_kthread(void *data) { return 0; } +static inline int __clocksource_watchdog_kthread(void) { return 0; } +static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ @@ -553,24 +582,42 @@ static u64 clocksource_max_deferment(struct clocksource *cs) #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET -/** - * clocksource_select - Select the best clocksource available - * - * Private function. Must hold clocksource_mutex when called. - * - * Select the clocksource with the best rating, or the clocksource, - * which is selected by userspace override. - */ -static void clocksource_select(void) +static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) { - struct clocksource *best, *cs; + struct clocksource *cs; if (!finished_booting || list_empty(&clocksource_list)) + return NULL; + + /* + * We pick the clocksource with the highest rating. If oneshot + * mode is active, we pick the highres valid clocksource with + * the best rating. + */ + list_for_each_entry(cs, &clocksource_list, list) { + if (skipcur && cs == curr_clocksource) + continue; + if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES)) + continue; + return cs; + } + return NULL; +} + +static void __clocksource_select(bool skipcur) +{ + bool oneshot = tick_oneshot_mode_active(); + struct clocksource *best, *cs; + + /* Find the best suitable clocksource */ + best = clocksource_find_best(oneshot, skipcur); + if (!best) return; - /* First clocksource on the list has the best rating. */ - best = list_first_entry(&clocksource_list, struct clocksource, list); + /* Check for the override clocksource. */ list_for_each_entry(cs, &clocksource_list, list) { + if (skipcur && cs == curr_clocksource) + continue; if (strcmp(cs->name, override_name) != 0) continue; /* @@ -578,8 +625,7 @@ static void clocksource_select(void) * capable clocksource if the tick code is in oneshot * mode (highres or nohz) */ - if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && - tick_oneshot_mode_active()) { + if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) { /* Override clocksource cannot be used. */ printk(KERN_WARNING "Override clocksource %s is not " "HRT compatible. Cannot switch while in " @@ -590,16 +636,35 @@ static void clocksource_select(void) best = cs; break; } - if (curr_clocksource != best) { - printk(KERN_INFO "Switching to clocksource %s\n", best->name); + + if (curr_clocksource != best && !timekeeping_notify(best)) { + pr_info("Switched to clocksource %s\n", best->name); curr_clocksource = best; - timekeeping_notify(curr_clocksource); } } +/** + * clocksource_select - Select the best clocksource available + * + * Private function. Must hold clocksource_mutex when called. + * + * Select the clocksource with the best rating, or the clocksource, + * which is selected by userspace override. + */ +static void clocksource_select(void) +{ + return __clocksource_select(false); +} + +static void clocksource_select_fallback(void) +{ + return __clocksource_select(true); +} + #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ static inline void clocksource_select(void) { } +static inline void clocksource_select_fallback(void) { } #endif @@ -614,16 +679,11 @@ static int __init clocksource_done_booting(void) { mutex_lock(&clocksource_mutex); curr_clocksource = clocksource_default_clock(); - mutex_unlock(&clocksource_mutex); - finished_booting = 1; - /* * Run the watchdog first to eliminate unstable clock sources */ - clocksource_watchdog_kthread(NULL); - - mutex_lock(&clocksource_mutex); + __clocksource_watchdog_kthread(); clocksource_select(); mutex_unlock(&clocksource_mutex); return 0; @@ -756,7 +816,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating) list_del(&cs->list); cs->rating = rating; clocksource_enqueue(cs); - clocksource_select(); } /** @@ -768,21 +827,47 @@ void clocksource_change_rating(struct clocksource *cs, int rating) { mutex_lock(&clocksource_mutex); __clocksource_change_rating(cs, rating); + clocksource_select(); mutex_unlock(&clocksource_mutex); } EXPORT_SYMBOL(clocksource_change_rating); +/* + * Unbind clocksource @cs. Called with clocksource_mutex held + */ +static int clocksource_unbind(struct clocksource *cs) +{ + /* + * I really can't convince myself to support this on hardware + * designed by lobotomized monkeys. + */ + if (clocksource_is_watchdog(cs)) + return -EBUSY; + + if (cs == curr_clocksource) { + /* Select and try to install a replacement clock source */ + clocksource_select_fallback(); + if (curr_clocksource == cs) + return -EBUSY; + } + clocksource_dequeue_watchdog(cs); + list_del_init(&cs->list); + return 0; +} + /** * clocksource_unregister - remove a registered clocksource * @cs: clocksource to be unregistered */ -void clocksource_unregister(struct clocksource *cs) +int clocksource_unregister(struct clocksource *cs) { + int ret = 0; + mutex_lock(&clocksource_mutex); - clocksource_dequeue_watchdog(cs); - list_del(&cs->list); - clocksource_select(); + if (!list_empty(&cs->list)) + ret = clocksource_unbind(cs); mutex_unlock(&clocksource_mutex); + return ret; } EXPORT_SYMBOL(clocksource_unregister); @@ -808,6 +893,23 @@ sysfs_show_current_clocksources(struct device *dev, return count; } +size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) +{ + size_t ret = cnt; + + /* strings from sysfs write are not 0 terminated! */ + if (!cnt || cnt >= CS_NAME_LEN) + return -EINVAL; + + /* strip of \n: */ + if (buf[cnt-1] == '\n') + cnt--; + if (cnt > 0) + memcpy(dst, buf, cnt); + dst[cnt] = 0; + return ret; +} + /** * sysfs_override_clocksource - interface for manually overriding clocksource * @dev: unused @@ -822,22 +924,13 @@ static ssize_t sysfs_override_clocksource(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - size_t ret = count; - - /* strings from sysfs write are not 0 terminated! */ - if (count >= sizeof(override_name)) - return -EINVAL; - - /* strip of \n: */ - if (buf[count-1] == '\n') - count--; + size_t ret; mutex_lock(&clocksource_mutex); - if (count > 0) - memcpy(override_name, buf, count); - override_name[count] = 0; - clocksource_select(); + ret = sysfs_get_uname(buf, override_name, count); + if (ret >= 0) + clocksource_select(); mutex_unlock(&clocksource_mutex); @@ -845,6 +938,40 @@ static ssize_t sysfs_override_clocksource(struct device *dev, } /** + * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource + * @dev: unused + * @attr: unused + * @buf: unused + * @count: length of buffer + * + * Takes input from sysfs interface for manually unbinding a clocksource. + */ +static ssize_t sysfs_unbind_clocksource(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct clocksource *cs; + char name[CS_NAME_LEN]; + size_t ret; + + ret = sysfs_get_uname(buf, name, count); + if (ret < 0) + return ret; + + ret = -ENODEV; + mutex_lock(&clocksource_mutex); + list_for_each_entry(cs, &clocksource_list, list) { + if (strcmp(cs->name, name)) + continue; + ret = clocksource_unbind(cs); + break; + } + mutex_unlock(&clocksource_mutex); + + return ret ? ret : count; +} + +/** * sysfs_show_available_clocksources - sysfs interface for listing clocksource * @dev: unused * @attr: unused @@ -886,6 +1013,8 @@ sysfs_show_available_clocksources(struct device *dev, static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, sysfs_override_clocksource); +static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource); + static DEVICE_ATTR(available_clocksource, 0444, sysfs_show_available_clocksources, NULL); @@ -910,6 +1039,9 @@ static int __init init_clocksource_sysfs(void) &device_clocksource, &dev_attr_current_clocksource); if (!error) + error = device_create_file(&device_clocksource, + &dev_attr_unbind_clocksource); + if (!error) error = device_create_file( &device_clocksource, &dev_attr_available_clocksource); diff --git a/arch/arm/kernel/sched_clock.c b/kernel/time/sched_clock.c index e8edcaa0e432..a326f27d7f09 100644 --- a/arch/arm/kernel/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -13,8 +13,7 @@ #include <linux/sched.h> #include <linux/syscore_ops.h> #include <linux/timer.h> - -#include <asm/sched_clock.h> +#include <linux/sched_clock.h> struct clock_data { u64 epoch_ns; @@ -24,7 +23,6 @@ struct clock_data { u32 mult; u32 shift; bool suspended; - bool needs_suspend; }; static void sched_clock_poll(unsigned long wrap_ticks); @@ -51,10 +49,11 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) return (cyc * mult) >> shift; } -static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask) +static unsigned long long notrace sched_clock_32(void) { u64 epoch_ns; u32 epoch_cyc; + u32 cyc; if (cd.suspended) return cd.epoch_ns; @@ -73,7 +72,9 @@ static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask) smp_rmb(); } while (epoch_cyc != cd.epoch_cyc_copy); - return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, cd.mult, cd.shift); + cyc = read_sched_clock(); + cyc = (cyc - epoch_cyc) & sched_clock_mask; + return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); } /* @@ -165,12 +166,6 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) pr_debug("Registered %pF as sched_clock source\n", read); } -static unsigned long long notrace sched_clock_32(void) -{ - u32 cyc = read_sched_clock(); - return cyc_to_sched_clock(cyc, sched_clock_mask); -} - unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32; unsigned long long notrace sched_clock(void) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 20d6fba70652..6d3f91631de6 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -19,6 +19,7 @@ #include <linux/profile.h> #include <linux/sched.h> #include <linux/smp.h> +#include <linux/module.h> #include "tick-internal.h" @@ -29,6 +30,7 @@ static struct tick_device tick_broadcast_device; static cpumask_var_t tick_broadcast_mask; +static cpumask_var_t tick_broadcast_on; static cpumask_var_t tmpmask; static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); static int tick_broadcast_force; @@ -64,17 +66,34 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc) /* * Check, if the device can be utilized as broadcast device: */ -int tick_check_broadcast_device(struct clock_event_device *dev) +static bool tick_check_broadcast_device(struct clock_event_device *curdev, + struct clock_event_device *newdev) +{ + if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || + (newdev->features & CLOCK_EVT_FEAT_C3STOP)) + return false; + + if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && + !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) + return false; + + return !curdev || newdev->rating > curdev->rating; +} + +/* + * Conditionally install/replace broadcast device + */ +void tick_install_broadcast_device(struct clock_event_device *dev) { struct clock_event_device *cur = tick_broadcast_device.evtdev; - if ((dev->features & CLOCK_EVT_FEAT_DUMMY) || - (tick_broadcast_device.evtdev && - tick_broadcast_device.evtdev->rating >= dev->rating) || - (dev->features & CLOCK_EVT_FEAT_C3STOP)) - return 0; + if (!tick_check_broadcast_device(cur, dev)) + return; - clockevents_exchange_device(tick_broadcast_device.evtdev, dev); + if (!try_module_get(dev->owner)) + return; + + clockevents_exchange_device(cur, dev); if (cur) cur->event_handler = clockevents_handle_noop; tick_broadcast_device.evtdev = dev; @@ -90,7 +109,6 @@ int tick_check_broadcast_device(struct clock_event_device *dev) */ if (dev->features & CLOCK_EVT_FEAT_ONESHOT) tick_clock_notify(); - return 1; } /* @@ -123,8 +141,9 @@ static void tick_device_setup_broadcast_func(struct clock_event_device *dev) */ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { + struct clock_event_device *bc = tick_broadcast_device.evtdev; unsigned long flags; - int ret = 0; + int ret; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -138,20 +157,59 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) dev->event_handler = tick_handle_periodic; tick_device_setup_broadcast_func(dev); cpumask_set_cpu(cpu, tick_broadcast_mask); - tick_broadcast_start_periodic(tick_broadcast_device.evtdev); + tick_broadcast_start_periodic(bc); ret = 1; } else { /* - * When the new device is not affected by the stop - * feature and the cpu is marked in the broadcast mask - * then clear the broadcast bit. + * Clear the broadcast bit for this cpu if the + * device is not power state affected. */ - if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { - int cpu = smp_processor_id(); + if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) cpumask_clear_cpu(cpu, tick_broadcast_mask); - tick_broadcast_clear_oneshot(cpu); - } else { + else tick_device_setup_broadcast_func(dev); + + /* + * Clear the broadcast bit if the CPU is not in + * periodic broadcast on state. + */ + if (!cpumask_test_cpu(cpu, tick_broadcast_on)) + cpumask_clear_cpu(cpu, tick_broadcast_mask); + + switch (tick_broadcast_device.mode) { + case TICKDEV_MODE_ONESHOT: + /* + * If the system is in oneshot mode we can + * unconditionally clear the oneshot mask bit, + * because the CPU is running and therefore + * not in an idle state which causes the power + * state affected device to stop. Let the + * caller initialize the device. + */ + tick_broadcast_clear_oneshot(cpu); + ret = 0; + break; + + case TICKDEV_MODE_PERIODIC: + /* + * If the system is in periodic mode, check + * whether the broadcast device can be + * switched off now. + */ + if (cpumask_empty(tick_broadcast_mask) && bc) + clockevents_shutdown(bc); + /* + * If we kept the cpu in the broadcast mask, + * tell the caller to leave the per cpu device + * in shutdown state. The periodic interrupt + * is delivered by the broadcast device. + */ + ret = cpumask_test_cpu(cpu, tick_broadcast_mask); + break; + default: + /* Nothing to do */ + ret = 0; + break; } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); @@ -281,6 +339,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason) switch (*reason) { case CLOCK_EVT_NOTIFY_BROADCAST_ON: case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: + cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) @@ -290,8 +349,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason) tick_broadcast_force = 1; break; case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - if (!tick_broadcast_force && - cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { + if (tick_broadcast_force) + break; + cpumask_clear_cpu(cpu, tick_broadcast_on); + if (!tick_device_is_functional(dev)) + break; + if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(dev, 0); @@ -349,6 +412,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) bc = tick_broadcast_device.evtdev; cpumask_clear_cpu(cpu, tick_broadcast_mask); + cpumask_clear_cpu(cpu, tick_broadcast_on); if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { if (bc && cpumask_empty(tick_broadcast_mask)) @@ -475,7 +539,15 @@ void tick_check_oneshot_broadcast(int cpu) if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) { struct tick_device *td = &per_cpu(tick_cpu_device, cpu); - clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); + /* + * We might be in the middle of switching over from + * periodic to oneshot. If the CPU has not yet + * switched over, leave the device alone. + */ + if (td->mode == TICKDEV_MODE_ONESHOT) { + clockevents_set_mode(td->evtdev, + CLOCK_EVT_MODE_ONESHOT); + } } } @@ -522,6 +594,13 @@ again: cpumask_clear(tick_broadcast_force_mask); /* + * Sanity check. Catch the case where we try to broadcast to + * offline cpus. + */ + if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) + cpumask_and(tmpmask, tmpmask, cpu_online_mask); + + /* * Wakeup the cpus which have an expired event. */ tick_do_broadcast(tmpmask); @@ -761,10 +840,12 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) raw_spin_lock_irqsave(&tick_broadcast_lock, flags); /* - * Clear the broadcast mask flag for the dead cpu, but do not - * stop the broadcast device! + * Clear the broadcast masks for the dead cpu, but do not stop + * the broadcast device! */ cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); + cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); + cpumask_clear_cpu(cpu, tick_broadcast_force_mask); raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } @@ -792,6 +873,7 @@ bool tick_broadcast_oneshot_available(void) void __init tick_broadcast_init(void) { zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); + zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); #ifdef CONFIG_TICK_ONESHOT zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 5d3fb100bc06..64522ecdfe0e 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -18,6 +18,7 @@ #include <linux/percpu.h> #include <linux/profile.h> #include <linux/sched.h> +#include <linux/module.h> #include <asm/irq_regs.h> @@ -33,7 +34,6 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device); ktime_t tick_next_period; ktime_t tick_period; int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; -static DEFINE_RAW_SPINLOCK(tick_device_lock); /* * Debugging: see timer_list.c @@ -194,7 +194,8 @@ static void tick_setup_device(struct tick_device *td, * When global broadcasting is active, check if the current * device is registered as a placeholder for broadcast mode. * This allows us to handle this x86 misfeature in a generic - * way. + * way. This function also returns !=0 when we keep the + * current active broadcast state for this CPU. */ if (tick_device_uses_broadcast(newdev, cpu)) return; @@ -205,17 +206,75 @@ static void tick_setup_device(struct tick_device *td, tick_setup_oneshot(newdev, handler, next_event); } +void tick_install_replacement(struct clock_event_device *newdev) +{ + struct tick_device *td = &__get_cpu_var(tick_cpu_device); + int cpu = smp_processor_id(); + + clockevents_exchange_device(td->evtdev, newdev); + tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); + if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) + tick_oneshot_notify(); +} + +static bool tick_check_percpu(struct clock_event_device *curdev, + struct clock_event_device *newdev, int cpu) +{ + if (!cpumask_test_cpu(cpu, newdev->cpumask)) + return false; + if (cpumask_equal(newdev->cpumask, cpumask_of(cpu))) + return true; + /* Check if irq affinity can be set */ + if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq)) + return false; + /* Prefer an existing cpu local device */ + if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) + return false; + return true; +} + +static bool tick_check_preferred(struct clock_event_device *curdev, + struct clock_event_device *newdev) +{ + /* Prefer oneshot capable device */ + if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) { + if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT)) + return false; + if (tick_oneshot_mode_active()) + return false; + } + + /* + * Use the higher rated one, but prefer a CPU local device with a lower + * rating than a non-CPU local device + */ + return !curdev || + newdev->rating > curdev->rating || + !cpumask_equal(curdev->cpumask, newdev->cpumask); +} + +/* + * Check whether the new device is a better fit than curdev. curdev + * can be NULL ! + */ +bool tick_check_replacement(struct clock_event_device *curdev, + struct clock_event_device *newdev) +{ + if (tick_check_percpu(curdev, newdev, smp_processor_id())) + return false; + + return tick_check_preferred(curdev, newdev); +} + /* - * Check, if the new registered device should be used. + * Check, if the new registered device should be used. Called with + * clockevents_lock held and interrupts disabled. */ -static int tick_check_new_device(struct clock_event_device *newdev) +void tick_check_new_device(struct clock_event_device *newdev) { struct clock_event_device *curdev; struct tick_device *td; - int cpu, ret = NOTIFY_OK; - unsigned long flags; - - raw_spin_lock_irqsave(&tick_device_lock, flags); + int cpu; cpu = smp_processor_id(); if (!cpumask_test_cpu(cpu, newdev->cpumask)) @@ -225,40 +284,15 @@ static int tick_check_new_device(struct clock_event_device *newdev) curdev = td->evtdev; /* cpu local device ? */ - if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) { - - /* - * If the cpu affinity of the device interrupt can not - * be set, ignore it. - */ - if (!irq_can_set_affinity(newdev->irq)) - goto out_bc; + if (!tick_check_percpu(curdev, newdev, cpu)) + goto out_bc; - /* - * If we have a cpu local device already, do not replace it - * by a non cpu local device - */ - if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) - goto out_bc; - } + /* Preference decision */ + if (!tick_check_preferred(curdev, newdev)) + goto out_bc; - /* - * If we have an active device, then check the rating and the oneshot - * feature. - */ - if (curdev) { - /* - * Prefer one shot capable devices ! - */ - if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && - !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) - goto out_bc; - /* - * Check the rating - */ - if (curdev->rating >= newdev->rating) - goto out_bc; - } + if (!try_module_get(newdev->owner)) + return; /* * Replace the eventually existing device by the new @@ -273,20 +307,13 @@ static int tick_check_new_device(struct clock_event_device *newdev) tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) tick_oneshot_notify(); - - raw_spin_unlock_irqrestore(&tick_device_lock, flags); - return NOTIFY_STOP; + return; out_bc: /* * Can the new device be used as a broadcast device ? */ - if (tick_check_broadcast_device(newdev)) - ret = NOTIFY_STOP; - - raw_spin_unlock_irqrestore(&tick_device_lock, flags); - - return ret; + tick_install_broadcast_device(newdev); } /* @@ -294,7 +321,7 @@ out_bc: * * Called with interrupts disabled. */ -static void tick_handover_do_timer(int *cpup) +void tick_handover_do_timer(int *cpup) { if (*cpup == tick_do_timer_cpu) { int cpu = cpumask_first(cpu_online_mask); @@ -311,13 +338,11 @@ static void tick_handover_do_timer(int *cpup) * access the hardware device itself. * We just set the mode and remove it from the lists. */ -static void tick_shutdown(unsigned int *cpup) +void tick_shutdown(unsigned int *cpup) { struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); struct clock_event_device *dev = td->evtdev; - unsigned long flags; - raw_spin_lock_irqsave(&tick_device_lock, flags); td->mode = TICKDEV_MODE_PERIODIC; if (dev) { /* @@ -329,26 +354,20 @@ static void tick_shutdown(unsigned int *cpup) dev->event_handler = clockevents_handle_noop; td->evtdev = NULL; } - raw_spin_unlock_irqrestore(&tick_device_lock, flags); } -static void tick_suspend(void) +void tick_suspend(void) { struct tick_device *td = &__get_cpu_var(tick_cpu_device); - unsigned long flags; - raw_spin_lock_irqsave(&tick_device_lock, flags); clockevents_shutdown(td->evtdev); - raw_spin_unlock_irqrestore(&tick_device_lock, flags); } -static void tick_resume(void) +void tick_resume(void) { struct tick_device *td = &__get_cpu_var(tick_cpu_device); - unsigned long flags; int broadcast = tick_resume_broadcast(); - raw_spin_lock_irqsave(&tick_device_lock, flags); clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); if (!broadcast) { @@ -357,68 +376,12 @@ static void tick_resume(void) else tick_resume_oneshot(); } - raw_spin_unlock_irqrestore(&tick_device_lock, flags); } -/* - * Notification about clock event devices - */ -static int tick_notify(struct notifier_block *nb, unsigned long reason, - void *dev) -{ - switch (reason) { - - case CLOCK_EVT_NOTIFY_ADD: - return tick_check_new_device(dev); - - case CLOCK_EVT_NOTIFY_BROADCAST_ON: - case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: - tick_broadcast_on_off(reason, dev); - break; - - case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: - case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: - tick_broadcast_oneshot_control(reason); - break; - - case CLOCK_EVT_NOTIFY_CPU_DYING: - tick_handover_do_timer(dev); - break; - - case CLOCK_EVT_NOTIFY_CPU_DEAD: - tick_shutdown_broadcast_oneshot(dev); - tick_shutdown_broadcast(dev); - tick_shutdown(dev); - break; - - case CLOCK_EVT_NOTIFY_SUSPEND: - tick_suspend(); - tick_suspend_broadcast(); - break; - - case CLOCK_EVT_NOTIFY_RESUME: - tick_resume(); - break; - - default: - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block tick_notifier = { - .notifier_call = tick_notify, -}; - /** * tick_init - initialize the tick control - * - * Register the notifier with the clockevents framework */ void __init tick_init(void) { - clockevents_register_notifier(&tick_notifier); tick_broadcast_init(); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f0299eae4602..bc906cad709b 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -6,6 +6,8 @@ extern seqlock_t jiffies_lock; +#define CS_NAME_LEN 32 + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD #define TICK_DO_TIMER_NONE -1 @@ -18,9 +20,19 @@ extern int tick_do_timer_cpu __read_mostly; extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); +extern void tick_check_new_device(struct clock_event_device *dev); +extern void tick_handover_do_timer(int *cpup); +extern void tick_shutdown(unsigned int *cpup); +extern void tick_suspend(void); +extern void tick_resume(void); +extern bool tick_check_replacement(struct clock_event_device *curdev, + struct clock_event_device *newdev); +extern void tick_install_replacement(struct clock_event_device *dev); extern void clockevents_shutdown(struct clock_event_device *dev); +extern size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); + /* * NO_HZ / high resolution timer shared code */ @@ -90,7 +102,7 @@ static inline bool tick_broadcast_oneshot_available(void) { return false; } */ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); -extern int tick_check_broadcast_device(struct clock_event_device *dev); +extern void tick_install_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); extern void tick_shutdown_broadcast(unsigned int *cpup); @@ -102,9 +114,8 @@ tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); #else /* !BROADCAST */ -static inline int tick_check_broadcast_device(struct clock_event_device *dev) +static inline void tick_install_broadcast_device(struct clock_event_device *dev) { - return 0; } static inline int tick_is_broadcast_device(struct clock_event_device *dev) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index baeeb5c87cf1..48b9fffabdc2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -25,6 +25,11 @@ #include "tick-internal.h" #include "ntp_internal.h" +#include "timekeeping_internal.h" + +#define TK_CLEAR_NTP (1 << 0) +#define TK_MIRROR (1 << 1) +#define TK_CLOCK_WAS_SET (1 << 2) static struct timekeeper timekeeper; static DEFINE_RAW_SPINLOCK(timekeeper_lock); @@ -200,9 +205,9 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); -static void update_pvclock_gtod(struct timekeeper *tk) +static void update_pvclock_gtod(struct timekeeper *tk, bool was_set) { - raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk); + raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk); } /** @@ -216,7 +221,7 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb) raw_spin_lock_irqsave(&timekeeper_lock, flags); ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); - update_pvclock_gtod(tk); + update_pvclock_gtod(tk, true); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); return ret; @@ -241,16 +246,16 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); /* must hold timekeeper_lock */ -static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror) +static void timekeeping_update(struct timekeeper *tk, unsigned int action) { - if (clearntp) { + if (action & TK_CLEAR_NTP) { tk->ntp_error = 0; ntp_clear(); } update_vsyscall(tk); - update_pvclock_gtod(tk); + update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); - if (mirror) + if (action & TK_MIRROR) memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper)); } @@ -508,7 +513,7 @@ int do_settimeofday(const struct timespec *tv) tk_set_xtime(tk, tv); - timekeeping_update(tk, true, true); + timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -552,7 +557,7 @@ int timekeeping_inject_offset(struct timespec *ts) tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); error: /* even if we error out, we forwarded the time, so call update */ - timekeeping_update(tk, true, true); + timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -627,13 +632,22 @@ static int change_clocksource(void *data) write_seqcount_begin(&timekeeper_seq); timekeeping_forward_now(tk); - if (!new->enable || new->enable(new) == 0) { - old = tk->clock; - tk_setup_internals(tk, new); - if (old->disable) - old->disable(old); + /* + * If the cs is in module, get a module reference. Succeeds + * for built-in code (owner == NULL) as well. + */ + if (try_module_get(new->owner)) { + if (!new->enable || new->enable(new) == 0) { + old = tk->clock; + tk_setup_internals(tk, new); + if (old->disable) + old->disable(old); + module_put(old->owner); + } else { + module_put(new->owner); + } } - timekeeping_update(tk, true, true); + timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -648,14 +662,15 @@ static int change_clocksource(void *data) * This function is called from clocksource.c after a new, better clock * source has been registered. The caller holds the clocksource_mutex. */ -void timekeeping_notify(struct clocksource *clock) +int timekeeping_notify(struct clocksource *clock) { struct timekeeper *tk = &timekeeper; if (tk->clock == clock) - return; + return 0; stop_machine(change_clocksource, clock, NULL); tick_clock_notify(); + return tk->clock == clock ? 0 : -1; } /** @@ -841,6 +856,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, tk_xtime_add(tk, delta); tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta)); tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta)); + tk_debug_account_sleep_time(delta); } /** @@ -872,7 +888,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) __timekeeping_inject_sleeptime(tk, delta); - timekeeping_update(tk, true, true); + timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -954,7 +970,7 @@ static void timekeeping_resume(void) tk->cycle_last = clock->cycle_last = cycle_now; tk->ntp_error = 0; timekeeping_suspended = 0; - timekeeping_update(tk, false, true); + timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -1236,9 +1252,10 @@ out_adjust: * It also calls into the NTP code to handle leapsecond processing. * */ -static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) +static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) { u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; + unsigned int action = 0; while (tk->xtime_nsec >= nsecps) { int leap; @@ -1261,8 +1278,10 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); clock_was_set_delayed(); + action = TK_CLOCK_WAS_SET; } } + return action; } /** @@ -1347,6 +1366,7 @@ static void update_wall_time(void) struct timekeeper *tk = &shadow_timekeeper; cycle_t offset; int shift = 0, maxshift; + unsigned int action; unsigned long flags; raw_spin_lock_irqsave(&timekeeper_lock, flags); @@ -1399,7 +1419,7 @@ static void update_wall_time(void) * Finally, make sure that after the rounding * xtime_nsec isn't larger than NSEC_PER_SEC */ - accumulate_nsecs_to_secs(tk); + action = accumulate_nsecs_to_secs(tk); write_seqcount_begin(&timekeeper_seq); /* Update clock->cycle_last with the new value */ @@ -1415,7 +1435,7 @@ static void update_wall_time(void) * updating. */ memcpy(real_tk, tk, sizeof(*tk)); - timekeeping_update(real_tk, false, false); + timekeeping_update(real_tk, action); write_seqcount_end(&timekeeper_seq); out: raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -1677,6 +1697,7 @@ int do_adjtimex(struct timex *txc) if (tai != orig_tai) { __timekeeping_set_tai_offset(tk, tai); + update_pvclock_gtod(tk, true); clock_was_set_delayed(); } write_seqcount_end(&timekeeper_seq); diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c new file mode 100644 index 000000000000..802433a4f5eb --- /dev/null +++ b/kernel/time/timekeeping_debug.c @@ -0,0 +1,72 @@ +/* + * debugfs file to track time spent in suspend + * + * Copyright (c) 2011, Google, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/debugfs.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/seq_file.h> +#include <linux/time.h> + +static unsigned int sleep_time_bin[32] = {0}; + +static int tk_debug_show_sleep_time(struct seq_file *s, void *data) +{ + unsigned int bin; + seq_puts(s, " time (secs) count\n"); + seq_puts(s, "------------------------------\n"); + for (bin = 0; bin < 32; bin++) { + if (sleep_time_bin[bin] == 0) + continue; + seq_printf(s, "%10u - %-10u %4u\n", + bin ? 1 << (bin - 1) : 0, 1 << bin, + sleep_time_bin[bin]); + } + return 0; +} + +static int tk_debug_sleep_time_open(struct inode *inode, struct file *file) +{ + return single_open(file, tk_debug_show_sleep_time, NULL); +} + +static const struct file_operations tk_debug_sleep_time_fops = { + .open = tk_debug_sleep_time_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init tk_debug_sleep_time_init(void) +{ + struct dentry *d; + + d = debugfs_create_file("sleep_time", 0444, NULL, NULL, + &tk_debug_sleep_time_fops); + if (!d) { + pr_err("Failed to create sleep_time debug file\n"); + return -ENOMEM; + } + + return 0; +} +late_initcall(tk_debug_sleep_time_init); + +void tk_debug_account_sleep_time(struct timespec *t) +{ + sleep_time_bin[fls(t->tv_sec)]++; +} + diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h new file mode 100644 index 000000000000..13323ea08ffa --- /dev/null +++ b/kernel/time/timekeeping_internal.h @@ -0,0 +1,14 @@ +#ifndef _TIMEKEEPING_INTERNAL_H +#define _TIMEKEEPING_INTERNAL_H +/* + * timekeeping debug functions + */ +#include <linux/time.h> + +#ifdef CONFIG_DEBUG_FS +extern void tk_debug_account_sleep_time(struct timespec *t); +#else +#define tk_debug_account_sleep_time(x) +#endif + +#endif /* _TIMEKEEPING_INTERNAL_H */ diff --git a/kernel/timer.c b/kernel/timer.c index 15ffdb3f1948..15bc1b41021d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -149,9 +149,11 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu, /* now that we have rounded, subtract the extra skew again */ j -= cpu * 3; - if (j <= jiffies) /* rounding ate our timeout entirely; */ - return original; - return j; + /* + * Make sure j is still in the future. Otherwise return the + * unmodified value. + */ + return time_is_after_jiffies(j) ? j : original; } /** diff --git a/lib/Kconfig b/lib/Kconfig index 5a5203ded0dd..f1ed53c3aa44 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -66,6 +66,8 @@ config CRC16 config CRC_T10DIF tristate "CRC calculation for the T10 Data Integrity Field" + select CRYPTO + select CRYPTO_CRCT10DIF help This option is only needed if a module that's not in the kernel tree needs to calculate CRC checks for use with the diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index fbbd66ed86cd..fe3428c07b47 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c @@ -11,57 +11,44 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/crc-t10dif.h> +#include <linux/err.h> +#include <linux/init.h> +#include <crypto/hash.h> -/* Table generated using the following polynomium: - * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 - * gt: 0x8bb7 - */ -static const __u16 t10_dif_crc_table[256] = { - 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, - 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, - 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, - 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, - 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, - 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, - 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, - 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, - 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, - 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, - 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, - 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, - 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, - 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, - 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, - 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, - 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, - 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, - 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, - 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, - 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, - 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, - 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, - 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, - 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, - 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, - 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, - 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, - 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, - 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, - 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, - 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 -}; +static struct crypto_shash *crct10dif_tfm; __u16 crc_t10dif(const unsigned char *buffer, size_t len) { - __u16 crc = 0; - unsigned int i; + struct { + struct shash_desc shash; + char ctx[2]; + } desc; + int err; + + desc.shash.tfm = crct10dif_tfm; + desc.shash.flags = 0; + *(__u16 *)desc.ctx = 0; - for (i = 0 ; i < len ; i++) - crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; + err = crypto_shash_update(&desc.shash, buffer, len); + BUG_ON(err); - return crc; + return *(__u16 *)desc.ctx; } EXPORT_SYMBOL(crc_t10dif); +static int __init crc_t10dif_mod_init(void) +{ + crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0); + return PTR_RET(crct10dif_tfm); +} + +static void __exit crc_t10dif_mod_fini(void) +{ + crypto_free_shash(crct10dif_tfm); +} + +module_init(crc_t10dif_mod_init); +module_exit(crc_t10dif_mod_fini); + MODULE_DESCRIPTION("T10 DIF CRC calculation"); MODULE_LICENSE("GPL"); diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 3d155dd27eb6..62164348ecf7 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -861,24 +861,23 @@ static void check_section(const char *modname, struct elf_info *elf, #define ALL_INIT_DATA_SECTIONS \ - ".init.setup$", ".init.rodata$", \ - ".cpuinit.rodata$", ".meminit.rodata$", \ - ".init.data$", ".cpuinit.data$", ".meminit.data$" + ".init.setup$", ".init.rodata$", ".meminit.rodata$", \ + ".init.data$", ".meminit.data$" #define ALL_EXIT_DATA_SECTIONS \ - ".exit.data$", ".cpuexit.data$", ".memexit.data$" + ".exit.data$", ".memexit.data$" #define ALL_INIT_TEXT_SECTIONS \ - ".init.text$", ".cpuinit.text$", ".meminit.text$" + ".init.text$", ".meminit.text$" #define ALL_EXIT_TEXT_SECTIONS \ - ".exit.text$", ".cpuexit.text$", ".memexit.text$" + ".exit.text$", ".memexit.text$" #define ALL_PCI_INIT_SECTIONS \ ".pci_fixup_early$", ".pci_fixup_header$", ".pci_fixup_final$", \ ".pci_fixup_enable$", ".pci_fixup_resume$", \ ".pci_fixup_resume_early$", ".pci_fixup_suspend$" -#define ALL_XXXINIT_SECTIONS CPU_INIT_SECTIONS, MEM_INIT_SECTIONS -#define ALL_XXXEXIT_SECTIONS CPU_EXIT_SECTIONS, MEM_EXIT_SECTIONS +#define ALL_XXXINIT_SECTIONS MEM_INIT_SECTIONS +#define ALL_XXXEXIT_SECTIONS MEM_EXIT_SECTIONS #define ALL_INIT_SECTIONS INIT_SECTIONS, ALL_XXXINIT_SECTIONS #define ALL_EXIT_SECTIONS EXIT_SECTIONS, ALL_XXXEXIT_SECTIONS @@ -887,11 +886,9 @@ static void check_section(const char *modname, struct elf_info *elf, #define TEXT_SECTIONS ".text$", ".text.unlikely$" #define INIT_SECTIONS ".init.*" -#define CPU_INIT_SECTIONS ".cpuinit.*" #define MEM_INIT_SECTIONS ".meminit.*" #define EXIT_SECTIONS ".exit.*" -#define CPU_EXIT_SECTIONS ".cpuexit.*" #define MEM_EXIT_SECTIONS ".memexit.*" /* init data sections */ @@ -979,48 +976,20 @@ const struct sectioncheck sectioncheck[] = { .mismatch = DATA_TO_ANY_EXIT, .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL }, }, -/* Do not reference init code/data from cpuinit/meminit code/data */ +/* Do not reference init code/data from meminit code/data */ { .fromsec = { ALL_XXXINIT_SECTIONS, NULL }, .tosec = { INIT_SECTIONS, NULL }, .mismatch = XXXINIT_TO_SOME_INIT, .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL }, }, -/* Do not reference cpuinit code/data from meminit code/data */ -{ - .fromsec = { MEM_INIT_SECTIONS, NULL }, - .tosec = { CPU_INIT_SECTIONS, NULL }, - .mismatch = XXXINIT_TO_SOME_INIT, - .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL }, -}, -/* Do not reference meminit code/data from cpuinit code/data */ -{ - .fromsec = { CPU_INIT_SECTIONS, NULL }, - .tosec = { MEM_INIT_SECTIONS, NULL }, - .mismatch = XXXINIT_TO_SOME_INIT, - .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL }, -}, -/* Do not reference exit code/data from cpuexit/memexit code/data */ +/* Do not reference exit code/data from memexit code/data */ { .fromsec = { ALL_XXXEXIT_SECTIONS, NULL }, .tosec = { EXIT_SECTIONS, NULL }, .mismatch = XXXEXIT_TO_SOME_EXIT, .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL }, }, -/* Do not reference cpuexit code/data from memexit code/data */ -{ - .fromsec = { MEM_EXIT_SECTIONS, NULL }, - .tosec = { CPU_EXIT_SECTIONS, NULL }, - .mismatch = XXXEXIT_TO_SOME_EXIT, - .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL }, -}, -/* Do not reference memexit code/data from cpuexit code/data */ -{ - .fromsec = { CPU_EXIT_SECTIONS, NULL }, - .tosec = { MEM_EXIT_SECTIONS, NULL }, - .mismatch = XXXEXIT_TO_SOME_EXIT, - .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL }, -}, /* Do not use exit code/data from init code */ { .fromsec = { ALL_INIT_SECTIONS, NULL }, @@ -1089,8 +1058,6 @@ static const struct sectioncheck *section_mismatch( * Pattern 2: * Many drivers utilise a *driver container with references to * add, remove, probe functions etc. - * These functions may often be marked __cpuinit and we do not want to - * warn here. * the pattern is identified by: * tosec = init or exit section * fromsec = data section @@ -1249,7 +1216,6 @@ static Elf_Sym *find_elf_symbol2(struct elf_info *elf, Elf_Addr addr, /* * Convert a section name to the function/data attribute * .init.text => __init - * .cpuinit.data => __cpudata * .memexitconst => __memconst * etc. * diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 0a63658065f0..4cb14cae3791 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -6,6 +6,7 @@ TARGETS += memory-hotplug TARGETS += mqueue TARGETS += net TARGETS += ptrace +TARGETS += timers TARGETS += vm all: diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile new file mode 100644 index 000000000000..eb2859f4ad21 --- /dev/null +++ b/tools/testing/selftests/timers/Makefile @@ -0,0 +1,8 @@ +all: + gcc posix_timers.c -o posix_timers -lrt + +run_tests: all + ./posix_timers + +clean: + rm -f ./posix_timers diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c new file mode 100644 index 000000000000..4fa655d68a81 --- /dev/null +++ b/tools/testing/selftests/timers/posix_timers.c @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2013 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com> + * + * Licensed under the terms of the GNU GPL License version 2 + * + * Selftests for a few posix timers interface. + * + * Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com> + */ + +#include <sys/time.h> +#include <stdio.h> +#include <signal.h> +#include <unistd.h> +#include <time.h> +#include <pthread.h> + +#define DELAY 2 +#define USECS_PER_SEC 1000000 + +static volatile int done; + +/* Busy loop in userspace to elapse ITIMER_VIRTUAL */ +static void user_loop(void) +{ + while (!done); +} + +/* + * Try to spend as much time as possible in kernelspace + * to elapse ITIMER_PROF. + */ +static void kernel_loop(void) +{ + void *addr = sbrk(0); + + while (!done) { + brk(addr + 4096); + brk(addr); + } +} + +/* + * Sleep until ITIMER_REAL expiration. + */ +static void idle_loop(void) +{ + pause(); +} + +static void sig_handler(int nr) +{ + done = 1; +} + +/* + * Check the expected timer expiration matches the GTOD elapsed delta since + * we armed the timer. Keep a 0.5 sec error margin due to various jitter. + */ +static int check_diff(struct timeval start, struct timeval end) +{ + long long diff; + + diff = end.tv_usec - start.tv_usec; + diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC; + + if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { + printf("Diff too high: %lld..", diff); + return -1; + } + + return 0; +} + +static int check_itimer(int which) +{ + int err; + struct timeval start, end; + struct itimerval val = { + .it_value.tv_sec = DELAY, + }; + + printf("Check itimer "); + + if (which == ITIMER_VIRTUAL) + printf("virtual... "); + else if (which == ITIMER_PROF) + printf("prof... "); + else if (which == ITIMER_REAL) + printf("real... "); + + fflush(stdout); + + done = 0; + + if (which == ITIMER_VIRTUAL) + signal(SIGVTALRM, sig_handler); + else if (which == ITIMER_PROF) + signal(SIGPROF, sig_handler); + else if (which == ITIMER_REAL) + signal(SIGALRM, sig_handler); + + err = gettimeofday(&start, NULL); + if (err < 0) { + perror("Can't call gettimeofday()\n"); + return -1; + } + + err = setitimer(which, &val, NULL); + if (err < 0) { + perror("Can't set timer\n"); + return -1; + } + + if (which == ITIMER_VIRTUAL) + user_loop(); + else if (which == ITIMER_PROF) + kernel_loop(); + else if (which == ITIMER_REAL) + idle_loop(); + + gettimeofday(&end, NULL); + if (err < 0) { + perror("Can't call gettimeofday()\n"); + return -1; + } + + if (!check_diff(start, end)) + printf("[OK]\n"); + else + printf("[FAIL]\n"); + + return 0; +} + +static int check_timer_create(int which) +{ + int err; + timer_t id; + struct timeval start, end; + struct itimerspec val = { + .it_value.tv_sec = DELAY, + }; + + printf("Check timer_create() "); + if (which == CLOCK_THREAD_CPUTIME_ID) { + printf("per thread... "); + } else if (which == CLOCK_PROCESS_CPUTIME_ID) { + printf("per process... "); + } + fflush(stdout); + + done = 0; + timer_create(which, NULL, &id); + if (err < 0) { + perror("Can't create timer\n"); + return -1; + } + signal(SIGALRM, sig_handler); + + err = gettimeofday(&start, NULL); + if (err < 0) { + perror("Can't call gettimeofday()\n"); + return -1; + } + + err = timer_settime(id, 0, &val, NULL); + if (err < 0) { + perror("Can't set timer\n"); + return -1; + } + + user_loop(); + + gettimeofday(&end, NULL); + if (err < 0) { + perror("Can't call gettimeofday()\n"); + return -1; + } + + if (!check_diff(start, end)) + printf("[OK]\n"); + else + printf("[FAIL]\n"); + + return 0; +} + +int main(int argc, char **argv) +{ + int err; + + printf("Testing posix timers. False negative may happen on CPU execution \n"); + printf("based timers if other threads run on the CPU...\n"); + + if (check_itimer(ITIMER_VIRTUAL) < 0) + return -1; + + if (check_itimer(ITIMER_PROF) < 0) + return -1; + + if (check_itimer(ITIMER_REAL) < 0) + return -1; + + if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0) + return -1; + + /* + * It's unfortunately hard to reliably test a timer expiration + * on parallel multithread cputime. We could arm it to expire + * on DELAY * nr_threads, with nr_threads busy looping, then wait + * the normal DELAY since the time is elapsing nr_threads faster. + * But for that we need to ensure we have real physical free CPUs + * to ensure true parallelism. So test only one thread until we + * find a better solution. + */ + if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0) + return -1; + + return 0; +} |