diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-29 18:05:31 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-29 18:05:31 -0700 |
commit | 7d36014b972a3833b883a7ef41e6bd3b0d187850 (patch) | |
tree | 5c828b0de1cd357288135cc04461d31386b89c26 | |
parent | 442a9ffabb21f175027e93e72ea05159818271a6 (diff) | |
parent | ecb41a77411358d385e3fde5b4e98a5f3d9cfdd5 (diff) | |
download | lwn-7d36014b972a3833b883a7ef41e6bd3b0d187850.tar.gz lwn-7d36014b972a3833b883a7ef41e6bd3b0d187850.zip |
Merge branch 'akpm' (Andrew's patch-bomb)
Merge patches through Andrew Morton:
"180 patches - err 181 - listed below:
- most of MM. I held back the (large) "memcg: add hugetlb extension"
series because a bunfight has recently broken out.
- leds. After this, Bryan Wu will be handling drivers/leds/
- backlight
- lib/
- rtc"
* emailed from Andrew Morton <akpm@linux-foundation.org>: (181 patches)
drivers/rtc/rtc-s3c.c: fix compiler warning
drivers/rtc/rtc-tegra.c: clean up probe/remove routines
drivers/rtc/rtc-pl031.c: remove RTC timer interrupt handling
drivers/rtc/rtc-lpc32xx.c: add device tree support
drivers/rtc/rtc-m41t93.c: don't let get_time() reset M41T93_FLAG_OF
rtc: ds1307: add trickle charger support
rtc: ds1307: remove superfluous initialization
rtc: rename CONFIG_RTC_MXC to CONFIG_RTC_DRV_MXC
drivers/rtc/Kconfig: place RTC_DRV_IMXDI and RTC_MXC under "on-CPU RTC drivers"
drivers/rtc/rtc-pcf8563.c: add RTC_VL_READ/RTC_VL_CLR ioctl feature
rtc: add ioctl to get/clear battery low voltage status
drivers/rtc/rtc-ep93xx.c: convert to use module_platform_driver()
rtc/spear: add Device Tree probing capability
lib/vsprintf.c: "%#o",0 becomes '0' instead of '00'
radix-tree: fix preload vector size
spinlock_debug: print kallsyms name for lock
vsprintf: fix %ps on non symbols when using kallsyms
lib/bitmap.c: fix documentation for scnprintf() functions
lib/string_helpers.c: make arrays static
lib/test-kstrtox.c: mark const init data with __initconst instead of __initdata
...
161 files changed, 4693 insertions, 2548 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533 b/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533 new file mode 100644 index 000000000000..77cf7ac949af --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533 @@ -0,0 +1,48 @@ +What: /sys/class/backlight/<backlight>/als_channel +Date: May 2012 +KernelVersion: 3.5 +Contact: Johan Hovold <jhovold@gmail.com> +Description: + Get the ALS output channel used as input in + ALS-current-control mode (0, 1), where + + 0 - out_current0 (backlight 0) + 1 - out_current1 (backlight 1) + +What: /sys/class/backlight/<backlight>/als_en +Date: May 2012 +KernelVersion: 3.5 +Contact: Johan Hovold <jhovold@gmail.com> +Description: + Enable ALS-current-control mode (0, 1). + +What: /sys/class/backlight/<backlight>/id +Date: April 2012 +KernelVersion: 3.5 +Contact: Johan Hovold <jhovold@gmail.com> +Description: + Get the id of this backlight (0, 1). + +What: /sys/class/backlight/<backlight>/linear +Date: April 2012 +KernelVersion: 3.5 +Contact: Johan Hovold <jhovold@gmail.com> +Description: + Set the brightness-mapping mode (0, 1), where + + 0 - exponential mode + 1 - linear mode + +What: /sys/class/backlight/<backlight>/pwm +Date: April 2012 +KernelVersion: 3.5 +Contact: Johan Hovold <jhovold@gmail.com> +Description: + Set the PWM-input control mask (5 bits), where + + bit 5 - PWM-input enabled in Zone 4 + bit 4 - PWM-input enabled in Zone 3 + bit 3 - PWM-input enabled in Zone 2 + bit 2 - PWM-input enabled in Zone 1 + bit 1 - PWM-input enabled in Zone 0 + bit 0 - PWM-input enabled diff --git a/Documentation/ABI/testing/sysfs-class-led-driver-lm3533 b/Documentation/ABI/testing/sysfs-class-led-driver-lm3533 new file mode 100644 index 000000000000..620ebb3b9baa --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-led-driver-lm3533 @@ -0,0 +1,65 @@ +What: /sys/class/leds/<led>/als_channel +Date: May 2012 +KernelVersion: 3.5 +Contact: Johan Hovold <jhovold@gmail.com> +Description: + Set the ALS output channel to use as input in + ALS-current-control mode (1, 2), where + + 1 - out_current1 + 2 - out_current2 + +What: /sys/class/leds/<led>/als_en +Date: May 2012 +KernelVersion: 3.5 +Contact: Johan Hovold <jhovold@gmail.com> +Description: + Enable ALS-current-control mode (0, 1). + +What: /sys/class/leds/<led>/falltime +What: /sys/class/leds/<led>/risetime +Date: April 2012 +KernelVersion: 3.5 +Contact: Johan Hovold <jhovold@gmail.com> +Description: + Set the pattern generator fall and rise times (0..7), where + + 0 - 2048 us + 1 - 262 ms + 2 - 524 ms + 3 - 1.049 s + 4 - 2.097 s + 5 - 4.194 s + 6 - 8.389 s + 7 - 16.78 s + +What: /sys/class/leds/<led>/id +Date: April 2012 +KernelVersion: 3.5 +Contact: Johan Hovold <jhovold@gmail.com> +Description: + Get the id of this led (0..3). + +What: /sys/class/leds/<led>/linear +Date: April 2012 +KernelVersion: 3.5 +Contact: Johan Hovold <jhovold@gmail.com> +Description: + Set the brightness-mapping mode (0, 1), where + + 0 - exponential mode + 1 - linear mode + +What: /sys/class/leds/<led>/pwm +Date: April 2012 +KernelVersion: 3.5 +Contact: Johan Hovold <jhovold@gmail.com> +Description: + Set the PWM-input control mask (5 bits), where + + bit 5 - PWM-input enabled in Zone 4 + bit 4 - PWM-input enabled in Zone 3 + bit 3 - PWM-input enabled in Zone 2 + bit 2 - PWM-input enabled in Zone 1 + bit 1 - PWM-input enabled in Zone 0 + bit 0 - PWM-input enabled diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 9b1067afb224..dd88540bb995 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -184,12 +184,14 @@ behind this approach is that a cgroup that aggressively uses a shared page will eventually get charged for it (once it is uncharged from the cgroup that brought it in -- this will happen on memory pressure). +But see section 8.2: when moving a task to another cgroup, its pages may +be recharged to the new cgroup, if move_charge_at_immigrate has been chosen. + Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used. When you do swapoff and make swapped-out pages of shmem(tmpfs) to be backed into memory in force, charges for pages are accounted against the caller of swapoff rather than the users of shmem. - 2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP) Swap Extension allows you to record charge for swap. A swapped-in page is @@ -374,14 +376,15 @@ cgroup might have some charge associated with it, even though all tasks have migrated away from it. (because we charge against pages, not against tasks.) -Such charges are freed or moved to their parent. At moving, both of RSS -and CACHES are moved to parent. -rmdir() may return -EBUSY if freeing/moving fails. See 5.1 also. +We move the stats to root (if use_hierarchy==0) or parent (if +use_hierarchy==1), and no change on the charge except uncharging +from the child. Charges recorded in swap information is not updated at removal of cgroup. Recorded information is discarded and a cgroup which uses swap (swapcache) will be charged as a new owner of it. +About use_hierarchy, see Section 6. 5. Misc. interfaces. @@ -394,13 +397,15 @@ will be charged as a new owner of it. Almost all pages tracked by this memory cgroup will be unmapped and freed. Some pages cannot be freed because they are locked or in-use. Such pages are - moved to parent and this cgroup will be empty. This may return -EBUSY if - VM is too busy to free/move all pages immediately. + moved to parent(if use_hierarchy==1) or root (if use_hierarchy==0) and this + cgroup will be empty. Typical use case of this interface is that calling this before rmdir(). Because rmdir() moves all pages to parent, some out-of-use page caches can be moved to the parent. If you want to avoid that, force_empty will be useful. + About use_hierarchy, see Section 6. + 5.2 stat file memory.stat file includes following statistics @@ -430,17 +435,10 @@ hierarchical_memory_limit - # of bytes of memory limit with regard to hierarchy hierarchical_memsw_limit - # of bytes of memory+swap limit with regard to hierarchy under which memory cgroup is. -total_cache - sum of all children's "cache" -total_rss - sum of all children's "rss" -total_mapped_file - sum of all children's "cache" -total_pgpgin - sum of all children's "pgpgin" -total_pgpgout - sum of all children's "pgpgout" -total_swap - sum of all children's "swap" -total_inactive_anon - sum of all children's "inactive_anon" -total_active_anon - sum of all children's "active_anon" -total_inactive_file - sum of all children's "inactive_file" -total_active_file - sum of all children's "active_file" -total_unevictable - sum of all children's "unevictable" +total_<counter> - # hierarchical version of <counter>, which in + addition to the cgroup's own value includes the + sum of all hierarchical children's values of + <counter>, i.e. total_cache # The following additional stats are dependent on CONFIG_DEBUG_VM. @@ -622,8 +620,7 @@ memory cgroup. bit | what type of charges would be moved ? -----+------------------------------------------------------------------------ 0 | A charge of an anonymous page(or swap of it) used by the target task. - | Those pages and swaps must be used only by the target task. You must - | enable Swap Extension(see 2.4) to enable move of swap charges. + | You must enable Swap Extension(see 2.4) to enable move of swap charges. -----+------------------------------------------------------------------------ 1 | A charge of file pages(normal file, tmpfs file(e.g. ipc shared memory) | and swaps of tmpfs file) mmapped by the target task. Unlike the case of @@ -636,8 +633,6 @@ memory cgroup. 8.3 TODO -- Implement madvise(2) to let users decide the vma to be moved or not to be - moved. - All of moving charge operations are done under cgroup_mutex. It's not good behavior to hold the mutex too long, so we may need some trick. diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt index f3c4ec3626a2..0c4a344e78fa 100644 --- a/Documentation/cgroups/resource_counter.txt +++ b/Documentation/cgroups/resource_counter.txt @@ -92,6 +92,14 @@ to work with it. The _locked routines imply that the res_counter->lock is taken. + f. void res_counter_uncharge_until + (struct res_counter *rc, struct res_counter *top, + unsinged long val) + + Almost same as res_cunter_uncharge() but propagation of uncharge + stops when rc == top. This is useful when kill a res_coutner in + child cgroup. + 2.1 Other accounting routines There are more routines that may help you with common needs, like diff --git a/Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt b/Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt new file mode 100644 index 000000000000..a87a1e9bc060 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt @@ -0,0 +1,15 @@ +* NXP LPC32xx SoC Real Time Clock controller + +Required properties: +- compatible: must be "nxp,lpc3220-rtc" +- reg: physical base address of the controller and length of memory mapped + region. +- interrupts: The RTC interrupt + +Example: + + rtc@40024000 { + compatible = "nxp,lpc3220-rtc"; + reg = <0x40024000 0x1000>; + interrupts = <52 0>; + }; diff --git a/Documentation/devicetree/bindings/rtc/spear-rtc.txt b/Documentation/devicetree/bindings/rtc/spear-rtc.txt new file mode 100644 index 000000000000..ca67ac62108e --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/spear-rtc.txt @@ -0,0 +1,17 @@ +* SPEAr RTC + +Required properties: +- compatible : "st,spear600-rtc" +- reg : Address range of the rtc registers +- interrupt-parent: Should be the phandle for the interrupt controller + that services interrupts for this device +- interrupt: Should contain the rtc interrupt number + +Example: + + rtc@fc000000 { + compatible = "st,spear600-rtc"; + reg = <0xfc000000 0x1000>; + interrupt-parent = <&vic1>; + interrupts = <12>; + }; diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 4fca82e5276e..d449e632e6a0 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -60,7 +60,6 @@ ata *); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); - void (*truncate_range)(struct inode *, loff_t, loff_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); locking rules: @@ -87,7 +86,6 @@ setxattr: yes getxattr: no listxattr: no removexattr: yes -truncate_range: yes fiemap: no Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index ef088e55ab2e..912af6ce5626 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -743,6 +743,7 @@ Committed_AS: 100056 kB VmallocTotal: 112216 kB VmallocUsed: 428 kB VmallocChunk: 111088 kB +AnonHugePages: 49152 kB MemTotal: Total usable ram (i.e. physical ram minus a few reserved bits and the kernel binary code) @@ -776,6 +777,7 @@ VmallocChunk: 111088 kB Dirty: Memory which is waiting to get written back to the disk Writeback: Memory which is actively being written back to the disk AnonPages: Non-file backed pages mapped into userspace page tables +AnonHugePages: Non-file backed huge pages mapped into userspace page tables Mapped: files which have been mmaped, such as libraries Slab: in-kernel data structures cache SReclaimable: Part of Slab, that might be reclaimed, such as caches diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 0d0492028082..ef19f91a0f12 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -363,7 +363,6 @@ struct inode_operations { ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); - void (*truncate_range)(struct inode *, loff_t, loff_t); }; Again, all methods are called without any locks being held, unless @@ -472,9 +471,6 @@ otherwise noted. removexattr: called by the VFS to remove an extended attribute from a file. This method is called by removexattr(2) system call. - truncate_range: a method provided by the underlying filesystem to truncate a - range of blocks , i.e. punch a hole somewhere in a file. - The Address Space Object ======================== @@ -760,7 +756,7 @@ struct file_operations ---------------------- This describes how the VFS can manipulate an open file. As of kernel -2.6.22, the following members are defined: +3.5, the following members are defined: struct file_operations { struct module *owner; @@ -790,6 +786,8 @@ struct file_operations { int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int); + int (*setlease)(struct file *, long arg, struct file_lock **); + long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len); }; Again, all methods are called without any locks being held, unless @@ -858,6 +856,11 @@ otherwise noted. splice_read: called by the VFS to splice data from file to a pipe. This method is used by the splice(2) system call + setlease: called by the VFS to set or release a file lock lease. + setlease has the file_lock_lock held and must not sleep. + + fallocate: called by the VFS to preallocate blocks or punch a hole. + Note that the file operations are implemented by the specific filesystem in which the inode resides. When opening a device node (character or block special) most filesystems will call special diff --git a/Documentation/leds/ledtrig-transient.txt b/Documentation/leds/ledtrig-transient.txt new file mode 100644 index 000000000000..3bd38b487df1 --- /dev/null +++ b/Documentation/leds/ledtrig-transient.txt @@ -0,0 +1,152 @@ +LED Transient Trigger +===================== + +The leds timer trigger does not currently have an interface to activate +a one shot timer. The current support allows for setting two timers, one for +specifying how long a state to be on, and the second for how long the state +to be off. The delay_on value specifies the time period an LED should stay +in on state, followed by a delay_off value that specifies how long the LED +should stay in off state. The on and off cycle repeats until the trigger +gets deactivated. There is no provision for one time activation to implement +features that require an on or off state to be held just once and then stay in +the original state forever. + +Without one shot timer interface, user space can still use timer trigger to +set a timer to hold a state, however when user space application crashes or +goes away without deactivating the timer, the hardware will be left in that +state permanently. + +As a specific example of this use-case, let's look at vibrate feature on +phones. Vibrate function on phones is implemented using PWM pins on SoC or +PMIC. There is a need to activate one shot timer to control the vibrate +feature, to prevent user space crashes leaving the phone in vibrate mode +permanently causing the battery to drain. + +Transient trigger addresses the need for one shot timer activation. The +transient trigger can be enabled and disabled just like the other leds +triggers. + +When an led class device driver registers itself, it can specify all leds +triggers it supports and a default trigger. During registration, activation +routine for the default trigger gets called. During registration of an led +class device, the LED state does not change. + +When the driver unregisters, deactivation routine for the currently active +trigger will be called, and LED state is changed to LED_OFF. + +Driver suspend changes the LED state to LED_OFF and resume doesn't change +the state. Please note that there is no explicit interaction between the +suspend and resume actions and the currently enabled trigger. LED state +changes are suspended while the driver is in suspend state. Any timers +that are active at the time driver gets suspended, continue to run, without +being able to actually change the LED state. Once driver is resumed, triggers +start functioning again. + +LED state changes are controlled using brightness which is a common led +class device property. When brightness is set to 0 from user space via +echo 0 > brightness, it will result in deactivating the current trigger. + +Transient trigger uses standard register and unregister interfaces. During +trigger registration, for each led class device that specifies this trigger +as its default trigger, trigger activation routine will get called. During +registration, the LED state does not change, unless there is another trigger +active, in which case LED state changes to LED_OFF. + +During trigger unregistration, LED state gets changed to LED_OFF. + +Transient trigger activation routine doesn't change the LED state. It +creates its properties and does its initialization. Transient trigger +deactivation routine, will cancel any timer that is active before it cleans +up and removes the properties it created. It will restore the LED state to +non-transient state. When driver gets suspended, irrespective of the transient +state, the LED state changes to LED_OFF. + +Transient trigger can be enabled and disabled from user space on led class +devices, that support this trigger as shown below: + +echo transient > trigger +echo none > trigger + +NOTE: Add a new property trigger state to control the state. + +This trigger exports three properties, activate, state, and duration. When +transient trigger is activated these properties are set to default values. + +- duration allows setting timer value in msecs. The initial value is 0. +- activate allows activating and deactivating the timer specified by + duration as needed. The initial and default value is 0. This will allow + duration to be set after trigger activation. +- state allows user to specify a transient state to be held for the specified + duration. + + activate - one shot timer activate mechanism. + 1 when activated, 0 when deactivated. + default value is zero when transient trigger is enabled, + to allow duration to be set. + + activate state indicates a timer with a value of specified + duration running. + deactivated state indicates that there is no active timer + running. + + duration - one shot timer value. When activate is set, duration value + is used to start a timer that runs once. This value doesn't + get changed by the trigger unless user does a set via + echo new_value > duration + + state - transient state to be held. It has two values 0 or 1. 0 maps + to LED_OFF and 1 maps to LED_FULL. The specified state is + held for the duration of the one shot timer and then the + state gets changed to the non-transient state which is the + inverse of transient state. + If state = LED_FULL, when the timer runs out the state will + go back to LED_OFF. + If state = LED_OFF, when the timer runs out the state will + go back to LED_FULL. + Please note that current LED state is not checked prior to + changing the state to the specified state. + Driver could map these values to inverted depending on the + default states it defines for the LED in its brightness_set() + interface which is called from the led brightness_set() + interfaces to control the LED state. + +When timer expires activate goes back to deactivated state, duration is left +at the set value to be used when activate is set at a future time. This will +allow user app to set the time once and activate it to run it once for the +specified value as needed. When timer expires, state is restored to the +non-transient state which is the inverse of the transient state. + + echo 1 > activate - starts timer = duration when duration is not 0. + echo 0 > activate - cancels currently running timer. + echo n > duration - stores timer value to be used upon next + activate. Currently active timer if + any, continues to run for the specified time. + echo 0 > duration - stores timer value to be used upon next + activate. Currently active timer if any, + continues to run for the specified time. + echo 1 > state - stores desired transient state LED_FULL to be + held for the specified duration. + echo 0 > state - stores desired transient state LED_OFF to be + held for the specified duration. + +What is not supported: +====================== +- Timer activation is one shot and extending and/or shortening the timer + is not supported. + +Example use-case 1: + echo transient > trigger + echo n > duration + echo 1 > state +repeat the following step as needed: + echo 1 > activate - start timer = duration to run once + echo 1 > activate - start timer = duration to run once + echo none > trigger + +This trigger is intended to be used for for the following example use cases: + - Control of vibrate (phones, tablets etc.) hardware by user space app. + - Use of LED by user space app as activity indicator. + - Use of LED by user space app as a kind of watchdog indicator -- as + long as the app is alive, it can keep the LED illuminated, if it dies + the LED will be extinguished automatically. + - Use by any user space app that needs a transient GPIO output. diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt index 29bdf62aac09..f734bb2a78dc 100644 --- a/Documentation/vm/transhuge.txt +++ b/Documentation/vm/transhuge.txt @@ -166,6 +166,68 @@ behavior. So to make them effective you need to restart any application that could have been using hugepages. This also applies to the regions registered in khugepaged. +== Monitoring usage == + +The number of transparent huge pages currently used by the system is +available by reading the AnonHugePages field in /proc/meminfo. To +identify what applications are using transparent huge pages, it is +necessary to read /proc/PID/smaps and count the AnonHugePages fields +for each mapping. Note that reading the smaps file is expensive and +reading it frequently will incur overhead. + +There are a number of counters in /proc/vmstat that may be used to +monitor how successfully the system is providing huge pages for use. + +thp_fault_alloc is incremented every time a huge page is successfully + allocated to handle a page fault. This applies to both the + first time a page is faulted and for COW faults. + +thp_collapse_alloc is incremented by khugepaged when it has found + a range of pages to collapse into one huge page and has + successfully allocated a new huge page to store the data. + +thp_fault_fallback is incremented if a page fault fails to allocate + a huge page and instead falls back to using small pages. + +thp_collapse_alloc_failed is incremented if khugepaged found a range + of pages that should be collapsed into one huge page but failed + the allocation. + +thp_split is incremented every time a huge page is split into base + pages. This can happen for a variety of reasons but a common + reason is that a huge page is old and is being reclaimed. + +As the system ages, allocating huge pages may be expensive as the +system uses memory compaction to copy data around memory to free a +huge page for use. There are some counters in /proc/vmstat to help +monitor this overhead. + +compact_stall is incremented every time a process stalls to run + memory compaction so that a huge page is free for use. + +compact_success is incremented if the system compacted memory and + freed a huge page for use. + +compact_fail is incremented if the system tries to compact memory + but failed. + +compact_pages_moved is incremented each time a page is moved. If + this value is increasing rapidly, it implies that the system + is copying a lot of data to satisfy the huge page allocation. + It is possible that the cost of copying exceeds any savings + from reduced TLB misses. + +compact_pagemigrate_failed is incremented when the underlying mechanism + for moving a page failed. + +compact_blocks_moved is incremented each time memory compaction examines + a huge page aligned range of pages. + +It is possible to establish how long the stalls were using the function +tracer to record how long was spent in __alloc_pages_nodemask and +using the mm_page_alloc tracepoint to identify which allocations were +for huge pages. + == get_user_pages and follow_page == get_user_pages and follow_page if run on a hugepage, will return the diff --git a/MAINTAINERS b/MAINTAINERS index cc710d2ef009..a246490c95eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3232,10 +3232,8 @@ F: include/linux/clockchips.h F: include/linux/hrtimer.h HIGH-SPEED SCC DRIVER FOR AX.25 -M: Klaus Kudielka <klaus.kudielka@ieee.org> L: linux-hams@vger.kernel.org -W: http://www.nt.tuwien.ac.at/~kkudielk/Linux/ -S: Maintained +S: Orphan F: drivers/net/hamradio/dmascc.c F: drivers/net/hamradio/scc.c @@ -4511,12 +4509,6 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: drivers/mmc/host/imxmmc.* -MOUSE AND MISC DEVICES [GENERAL] -M: Alessandro Rubini <rubini@ipvvis.unipv.it> -S: Maintained -F: drivers/input/mouse/ -F: include/linux/gpio_mouse.h - MOXA SMARTIO/INDUSTIO/INTELLIO SERIAL CARD M: Jiri Slaby <jirislaby@gmail.com> S: Maintained diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig index ebda45bd5763..e05a2f1665a7 100644 --- a/arch/arm/configs/imx_v4_v5_defconfig +++ b/arch/arm/configs/imx_v4_v5_defconfig @@ -173,7 +173,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_PCF8563=y CONFIG_RTC_DRV_IMXDI=y -CONFIG_RTC_MXC=y +CONFIG_RTC_DRV_MXC=y CONFIG_DMADEVICES=y CONFIG_IMX_SDMA=y CONFIG_IMX_DMA=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 12617f7296e6..b1d3675df72c 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -178,7 +178,7 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y -CONFIG_RTC_MXC=y +CONFIG_RTC_DRV_MXC=y CONFIG_DMADEVICES=y CONFIG_IMX_SDMA=y CONFIG_EXT2_FS=y diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 22d34d64cc81..bb344650a14f 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -40,6 +40,7 @@ config CRIS bool default y select HAVE_IDE + select GENERIC_ATOMIC64 select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_SHOW select GENERIC_IOMAP diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index effff47a3c82..43876f16caf1 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -31,6 +31,56 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) ptep->pte_low = pte.pte_low; } +#define pmd_read_atomic pmd_read_atomic +/* + * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with + * a "*pmdp" dereference done by gcc. Problem is, in certain places + * where pte_offset_map_lock is called, concurrent page faults are + * allowed, if the mmap_sem is hold for reading. An example is mincore + * vs page faults vs MADV_DONTNEED. On the page fault side + * pmd_populate rightfully does a set_64bit, but if we're reading the + * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen + * because gcc will not read the 64bit of the pmd atomically. To fix + * this all places running pmd_offset_map_lock() while holding the + * mmap_sem in read mode, shall read the pmdp pointer using this + * function to know if the pmd is null nor not, and in turn to know if + * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd + * operations. + * + * Without THP if the mmap_sem is hold for reading, the + * pmd can only transition from null to not null while pmd_read_atomic runs. + * So there's no need of literally reading it atomically. + * + * With THP if the mmap_sem is hold for reading, the pmd can become + * THP or null or point to a pte (and in turn become "stable") at any + * time under pmd_read_atomic, so it's mandatory to read it atomically + * with cmpxchg8b. + */ +#ifndef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_read_atomic(pmd_t *pmdp) +{ + pmdval_t ret; + u32 *tmp = (u32 *)pmdp; + + ret = (pmdval_t) (*tmp); + if (ret) { + /* + * If the low part is null, we must not read the high part + * or we can end up with a partial pmd. + */ + smp_rmb(); + ret |= ((pmdval_t)*(tmp + 1)) << 32; + } + + return (pmd_t) { ret }; +} +#else /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline pmd_t pmd_read_atomic(pmd_t *pmdp) +{ + return (pmd_t) { atomic64_read((atomic64_t *)pmdp) }; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 62d61e9976eb..41857970517f 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -113,7 +113,9 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, int x = e820x->nr_map; if (x >= ARRAY_SIZE(e820x->map)) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); + printk(KERN_ERR "e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", + (unsigned long long) start, + (unsigned long long) (start + size - 1)); return; } @@ -133,19 +135,19 @@ static void __init e820_print_type(u32 type) switch (type) { case E820_RAM: case E820_RESERVED_KERN: - printk(KERN_CONT "(usable)"); + printk(KERN_CONT "usable"); break; case E820_RESERVED: - printk(KERN_CONT "(reserved)"); + printk(KERN_CONT "reserved"); break; case E820_ACPI: - printk(KERN_CONT "(ACPI data)"); + printk(KERN_CONT "ACPI data"); break; case E820_NVS: - printk(KERN_CONT "(ACPI NVS)"); + printk(KERN_CONT "ACPI NVS"); break; case E820_UNUSABLE: - printk(KERN_CONT "(unusable)"); + printk(KERN_CONT "unusable"); break; default: printk(KERN_CONT "type %u", type); @@ -158,10 +160,10 @@ void __init e820_print_map(char *who) int i; for (i = 0; i < e820.nr_map; i++) { - printk(KERN_INFO " %s: %016Lx - %016Lx ", who, + printk(KERN_INFO "%s: [mem %#018Lx-%#018Lx] ", who, (unsigned long long) e820.map[i].addr, (unsigned long long) - (e820.map[i].addr + e820.map[i].size)); + (e820.map[i].addr + e820.map[i].size - 1)); e820_print_type(e820.map[i].type); printk(KERN_CONT "\n"); } @@ -428,9 +430,8 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start, size = ULLONG_MAX - start; end = start + size; - printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ", - (unsigned long long) start, - (unsigned long long) end); + printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ", + (unsigned long long) start, (unsigned long long) (end - 1)); e820_print_type(old_type); printk(KERN_CONT " ==> "); e820_print_type(new_type); @@ -509,9 +510,8 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, size = ULLONG_MAX - start; end = start + size; - printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", - (unsigned long long) start, - (unsigned long long) end); + printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ", + (unsigned long long) start, (unsigned long long) (end - 1)); if (checktype) e820_print_type(old_type); printk(KERN_CONT "\n"); @@ -567,7 +567,7 @@ void __init update_e820(void) if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) return; e820.nr_map = nr_map; - printk(KERN_INFO "modified physical RAM map:\n"); + printk(KERN_INFO "e820: modified physical RAM map:\n"); e820_print_map("modified"); } static void __init update_e820_saved(void) @@ -637,8 +637,8 @@ __init void e820_setup_gap(void) if (!found) { gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; printk(KERN_ERR - "PCI: Warning: Cannot find a gap in the 32bit address range\n" - "PCI: Unassigned devices with 32bit resource registers may break!\n"); + "e820: cannot find a gap in the 32bit address range\n" + "e820: PCI devices with unassigned 32bit BARs may break!\n"); } #endif @@ -648,8 +648,8 @@ __init void e820_setup_gap(void) pci_mem_start = gapstart; printk(KERN_INFO - "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", - pci_mem_start, gapstart, gapsize); + "e820: [mem %#010lx-%#010lx] available for PCI devices\n", + gapstart, gapstart + gapsize - 1); } /** @@ -667,7 +667,7 @@ void __init parse_e820_ext(struct setup_data *sdata) extmap = (struct e820entry *)(sdata->data); __append_e820_map(extmap, entries); sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); - printk(KERN_INFO "extended physical RAM map:\n"); + printk(KERN_INFO "e820: extended physical RAM map:\n"); e820_print_map("extended"); } @@ -734,7 +734,7 @@ u64 __init early_reserve_e820(u64 size, u64 align) addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); if (addr) { e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED); - printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); + printk(KERN_INFO "e820: update e820_saved for early_reserve_e820\n"); update_e820_saved(); } @@ -784,7 +784,7 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) if (last_pfn > max_arch_pfn) last_pfn = max_arch_pfn; - printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", + printk(KERN_INFO "e820: last_pfn = %#lx max_arch_pfn = %#lx\n", last_pfn, max_arch_pfn); return last_pfn; } @@ -888,7 +888,7 @@ void __init finish_e820_parsing(void) early_panic("Invalid user supplied memory map"); e820.nr_map = nr; - printk(KERN_INFO "user-defined physical RAM map:\n"); + printk(KERN_INFO "e820: user-defined physical RAM map:\n"); e820_print_map("user"); } } @@ -996,8 +996,9 @@ void __init e820_reserve_resources_late(void) end = MAX_RESOURCE_SIZE; if (start >= end) continue; - printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", - start, end); + printk(KERN_DEBUG + "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n", + start, end); reserve_region_with_split(&iomem_resource, start, end, "RAM buffer"); } @@ -1047,7 +1048,7 @@ void __init setup_memory_map(void) who = x86_init.resources.memory_setup(); memcpy(&e820_saved, &e820, sizeof(struct e820map)); - printk(KERN_INFO "BIOS-provided physical RAM map:\n"); + printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n"); e820_print_map(who); } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b02d4dd6b8a3..fbca2e6223bf 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -568,8 +568,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) struct mpf_intel *mpf; unsigned long mem; - apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", - bp, length); + apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n", + base, base + length - 1); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { @@ -584,8 +584,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) #endif mpf_found = mpf; - printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", - mpf, (u64)virt_to_phys(mpf)); + printk(KERN_INFO "found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n", + (unsigned long long) virt_to_phys(mpf), + (unsigned long long) virt_to_phys(mpf) + + sizeof(*mpf) - 1, mpf); mem = virt_to_phys(mpf); memblock_reserve(mem, sizeof(*mpf)); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f2afee6a19c1..982e44f960db 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -334,8 +334,8 @@ static void __init relocate_initrd(void) memblock_reserve(ramdisk_here, area_size); initrd_start = ramdisk_here + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; - printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", - ramdisk_here, ramdisk_here + ramdisk_size); + printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", + ramdisk_here, ramdisk_here + ramdisk_size - 1); q = (char *)initrd_start; @@ -366,8 +366,8 @@ static void __init relocate_initrd(void) /* high pages is not converted by early_res_to_bootmem */ ramdisk_image = boot_params.hdr.ramdisk_image; ramdisk_size = boot_params.hdr.ramdisk_size; - printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to" - " %08llx - %08llx\n", + printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" + " [mem %#010llx-%#010llx]\n", ramdisk_image, ramdisk_image + ramdisk_size - 1, ramdisk_here, ramdisk_here + ramdisk_size - 1); } @@ -392,8 +392,8 @@ static void __init reserve_initrd(void) ramdisk_size, end_of_lowmem>>1); } - printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image, - ramdisk_end); + printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, + ramdisk_end - 1); if (ramdisk_end <= end_of_lowmem) { @@ -906,8 +906,8 @@ void __init setup_arch(char **cmdline_p) setup_bios_corruption_check(); #endif - printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", - max_pfn_mapped<<PAGE_SHIFT); + printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", + (max_pfn_mapped<<PAGE_SHIFT) - 1); setup_trampolines(); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 319b6f2fb8b9..97141c26a13a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -84,8 +84,9 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en pgt_buf_end = pgt_buf_start; pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); - printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); + printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", + end - 1, pgt_buf_start << PAGE_SHIFT, + (pgt_buf_top << PAGE_SHIFT) - 1); } void __init native_pagetable_reserve(u64 start, u64 end) @@ -132,7 +133,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, int nr_range, i; int use_pse, use_gbpages; - printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); + printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n", + start, end - 1); #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) /* @@ -251,8 +253,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, } for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG " %010lx - %010lx page %s\n", - mr[i].start, mr[i].end, + printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", + mr[i].start, mr[i].end - 1, (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); @@ -350,8 +352,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) * create a kernel page fault: */ #ifdef CONFIG_DEBUG_PAGEALLOC - printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", - begin, end); + printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n", + begin, end - 1); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); #else /* diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 19d3fa08b119..2d125be1bae9 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -141,8 +141,8 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end, /* whine about and ignore invalid blks */ if (start > end || nid < 0 || nid >= MAX_NUMNODES) { - pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", - nid, start, end); + pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", + nid, start, end - 1); return 0; } @@ -210,8 +210,8 @@ static void __init setup_node_data(int nid, u64 start, u64 end) start = roundup(start, ZONE_ALIGN); - printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n", - nid, start, end); + printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", + nid, start, end - 1); /* * Allocate node data. Try remap allocator first, node-local @@ -232,7 +232,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end) } /* report and initialize */ - printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n", + printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n", nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); if (!remapped && tnid != nid) @@ -291,14 +291,14 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) */ if (bi->end > bj->start && bi->start < bj->end) { if (bi->nid != bj->nid) { - pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", - bi->nid, bi->start, bi->end, - bj->nid, bj->start, bj->end); + pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n", + bi->nid, bi->start, bi->end - 1, + bj->nid, bj->start, bj->end - 1); return -EINVAL; } - pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", - bi->nid, bi->start, bi->end, - bj->start, bj->end); + pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n", + bi->nid, bi->start, bi->end - 1, + bj->start, bj->end - 1); } /* @@ -320,9 +320,9 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) } if (k < mi->nr_blks) continue; - printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n", - bi->nid, bi->start, bi->end, bj->start, bj->end, - start, end); + printk(KERN_INFO "NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n", + bi->nid, bi->start, bi->end - 1, bj->start, + bj->end - 1, start, end - 1); bi->start = start; bi->end = end; numa_remove_memblk_from(j--, mi); @@ -616,8 +616,8 @@ static int __init dummy_numa_init(void) { printk(KERN_INFO "%s\n", numa_off ? "NUMA turned off" : "No NUMA configuration found"); - printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n", - 0LLU, PFN_PHYS(max_pfn)); + printk(KERN_INFO "Faking a node at [mem %#018Lx-%#018Lx]\n", + 0LLU, PFN_PHYS(max_pfn) - 1); node_set(0, numa_nodes_parsed); numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index 871dd8868170..dbbbb47260cc 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -68,8 +68,8 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei, numa_remove_memblk_from(phys_blk, pi); } - printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, - eb->start, eb->end, (eb->end - eb->start) >> 20); + printk(KERN_INFO "Faking node %d at [mem %#018Lx-%#018Lx] (%LuMB)\n", + nid, eb->start, eb->end - 1, (eb->end - eb->start) >> 20); return 0; } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index f6ff57b7efa5..f11729fd019c 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -209,9 +209,8 @@ static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, page = pfn_to_page(pfn); type = get_page_memtype(page); if (type != -1) { - printk(KERN_INFO "reserve_ram_pages_type failed " - "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n", - start, end, type, req_type); + printk(KERN_INFO "reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%lx, req 0x%lx\n", + start, end - 1, type, req_type); if (new_type) *new_type = type; @@ -314,9 +313,9 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, err = rbt_memtype_check_insert(new, new_type); if (err) { - printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " - "track %s, req %s\n", - start, end, cattr_name(new->type), cattr_name(req_type)); + printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", + start, end - 1, + cattr_name(new->type), cattr_name(req_type)); kfree(new); spin_unlock(&memtype_lock); @@ -325,8 +324,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, spin_unlock(&memtype_lock); - dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", - start, end, cattr_name(new->type), cattr_name(req_type), + dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", + start, end - 1, cattr_name(new->type), cattr_name(req_type), new_type ? cattr_name(*new_type) : "-"); return err; @@ -360,14 +359,14 @@ int free_memtype(u64 start, u64 end) spin_unlock(&memtype_lock); if (!entry) { - printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", - current->comm, current->pid, start, end); + printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", + current->comm, current->pid, start, end - 1); return -EINVAL; } kfree(entry); - dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); + dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1); return 0; } @@ -491,9 +490,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) while (cursor < to) { if (!devmem_is_allowed(pfn)) { - printk(KERN_INFO - "Program %s tried to access /dev/mem between %Lx->%Lx.\n", - current->comm, from, to); + printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n", + current->comm, from, to - 1); return 0; } cursor += PAGE_SIZE; @@ -554,12 +552,11 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) size; if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) { - printk(KERN_INFO - "%s:%d ioremap_change_attr failed %s " - "for %Lx-%Lx\n", + printk(KERN_INFO "%s:%d ioremap_change_attr failed %s " + "for [mem %#010Lx-%#010Lx]\n", current->comm, current->pid, cattr_name(flags), - base, (unsigned long long)(base + size)); + base, (unsigned long long)(base + size-1)); return -EINVAL; } return 0; @@ -591,12 +588,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, flags = lookup_memtype(paddr); if (want_flags != flags) { - printk(KERN_WARNING - "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n", + printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n", current->comm, current->pid, cattr_name(want_flags), (unsigned long long)paddr, - (unsigned long long)(paddr + size), + (unsigned long long)(paddr + size - 1), cattr_name(flags)); *vma_prot = __pgprot((pgprot_val(*vma_prot) & (~_PAGE_CACHE_MASK)) | @@ -614,11 +610,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, !is_new_memtype_allowed(paddr, size, want_flags, flags)) { free_memtype(paddr, paddr + size); printk(KERN_ERR "%s:%d map pfn expected mapping type %s" - " for %Lx-%Lx, got %s\n", + " for [mem %#010Lx-%#010Lx], got %s\n", current->comm, current->pid, cattr_name(want_flags), (unsigned long long)paddr, - (unsigned long long)(paddr + size), + (unsigned long long)(paddr + size - 1), cattr_name(flags)); return -EINVAL; } diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index efb5b4b93711..732af3a96183 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -176,8 +176,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) return; } - printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, - start, end); + printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", + node, pxm, + (unsigned long long) start, (unsigned long long) end - 1); } void __init acpi_numa_arch_fixup(void) {} diff --git a/drivers/base/node.c b/drivers/base/node.c index 90aa2a11a933..af1a177216f1 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -592,11 +592,9 @@ static ssize_t print_nodes_state(enum node_states state, char *buf) { int n; - n = nodelist_scnprintf(buf, PAGE_SIZE, node_states[state]); - if (n > 0 && PAGE_SIZE > n + 1) { - *(buf + n++) = '\n'; - *(buf + n++) = '\0'; - } + n = nodelist_scnprintf(buf, PAGE_SIZE-2, node_states[state]); + buf[n++] = '\n'; + buf[n] = '\0'; return n; } diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index ff4b8cfda585..04cb8c88d74b 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -50,6 +50,19 @@ config LEDS_LM3530 controlled manually or using PWM input or using ambient light automatically. +config LEDS_LM3533 + tristate "LED support for LM3533" + depends on LEDS_CLASS + depends on MFD_LM3533 + help + This option enables support for the LEDs on National Semiconductor / + TI LM3533 Lighting Power chips. + + The LEDs can be controlled directly, through PWM input, or by the + ambient-light-sensor interface. The chip supports + hardware-accelerated blinking with maximum on and off periods of 9.8 + and 77 seconds respectively. + config LEDS_LOCOMO tristate "LED Support for Locomo device" depends on LEDS_CLASS @@ -259,6 +272,14 @@ config LEDS_DA903X This option enables support for on-chip LED drivers found on Dialog Semiconductor DA9030/DA9034 PMICs. +config LEDS_DA9052 + tristate "Dialog DA9052/DA9053 LEDS" + depends on LEDS_CLASS + depends on PMIC_DA9052 + help + This option enables support for on-chip LED drivers found + on Dialog Semiconductor DA9052-BC and DA9053-AA/Bx PMICs. + config LEDS_DAC124S085 tristate "LED Support for DAC124S085 SPI DAC" depends on LEDS_CLASS @@ -471,4 +492,12 @@ config LEDS_TRIGGER_DEFAULT_ON comment "iptables trigger is under Netfilter config (LED target)" depends on LEDS_TRIGGERS +config LEDS_TRIGGER_TRANSIENT + tristate "LED Transient Trigger" + depends on LEDS_TRIGGERS + help + This allows one time activation of a transient state on + GPIO/PWM based hadrware. + If unsure, say Y. + endif # NEW_LEDS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 890481cb09f6..f8958cd6cf6e 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_LEDS_ATMEL_PWM) += leds-atmel-pwm.o obj-$(CONFIG_LEDS_BD2802) += leds-bd2802.o obj-$(CONFIG_LEDS_LOCOMO) += leds-locomo.o obj-$(CONFIG_LEDS_LM3530) += leds-lm3530.o +obj-$(CONFIG_LEDS_LM3533) += leds-lm3533.o obj-$(CONFIG_LEDS_MIKROTIK_RB532) += leds-rb532.o obj-$(CONFIG_LEDS_S3C24XX) += leds-s3c24xx.o obj-$(CONFIG_LEDS_NET48XX) += leds-net48xx.o @@ -31,6 +32,7 @@ obj-$(CONFIG_LEDS_FSG) += leds-fsg.o obj-$(CONFIG_LEDS_PCA955X) += leds-pca955x.o obj-$(CONFIG_LEDS_PCA9633) += leds-pca9633.o obj-$(CONFIG_LEDS_DA903X) += leds-da903x.o +obj-$(CONFIG_LEDS_DA9052) += leds-da9052.o obj-$(CONFIG_LEDS_WM831X_STATUS) += leds-wm831x-status.o obj-$(CONFIG_LEDS_WM8350) += leds-wm8350.o obj-$(CONFIG_LEDS_PWM) += leds-pwm.o @@ -56,3 +58,4 @@ obj-$(CONFIG_LEDS_TRIGGER_HEARTBEAT) += ledtrig-heartbeat.o obj-$(CONFIG_LEDS_TRIGGER_BACKLIGHT) += ledtrig-backlight.o obj-$(CONFIG_LEDS_TRIGGER_GPIO) += ledtrig-gpio.o obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON) += ledtrig-default-on.o +obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT) += ledtrig-transient.o diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 5bff8439dc68..8ee92c81aec2 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -44,23 +44,18 @@ static ssize_t led_brightness_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct led_classdev *led_cdev = dev_get_drvdata(dev); + unsigned long state; ssize_t ret = -EINVAL; - char *after; - unsigned long state = simple_strtoul(buf, &after, 10); - size_t count = after - buf; - if (isspace(*after)) - count++; + ret = kstrtoul(buf, 10, &state); + if (ret) + return ret; - if (count == size) { - ret = count; + if (state == LED_OFF) + led_trigger_remove(led_cdev); + led_set_brightness(led_cdev, state); - if (state == LED_OFF) - led_trigger_remove(led_cdev); - led_set_brightness(led_cdev, state); - } - - return ret; + return size; } static ssize_t led_max_brightness_show(struct device *dev, diff --git a/drivers/leds/leds-da9052.c b/drivers/leds/leds-da9052.c new file mode 100644 index 000000000000..58a5244c437e --- /dev/null +++ b/drivers/leds/leds-da9052.c @@ -0,0 +1,214 @@ +/* + * LED Driver for Dialog DA9052 PMICs. + * + * Copyright(c) 2012 Dialog Semiconductor Ltd. + * + * Author: David Dajun Chen <dchen@diasemi.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/leds.h> +#include <linux/workqueue.h> +#include <linux/slab.h> + +#include <linux/mfd/da9052/reg.h> +#include <linux/mfd/da9052/da9052.h> +#include <linux/mfd/da9052/pdata.h> + +#define DA9052_OPENDRAIN_OUTPUT 2 +#define DA9052_SET_HIGH_LVL_OUTPUT (1 << 3) +#define DA9052_MASK_UPPER_NIBBLE 0xF0 +#define DA9052_MASK_LOWER_NIBBLE 0x0F +#define DA9052_NIBBLE_SHIFT 4 +#define DA9052_MAX_BRIGHTNESS 0x5f + +struct da9052_led { + struct led_classdev cdev; + struct work_struct work; + struct da9052 *da9052; + unsigned char led_index; + unsigned char id; + int brightness; +}; + +static unsigned char led_reg[] = { + DA9052_LED_CONT_4_REG, + DA9052_LED_CONT_5_REG, +}; + +static int da9052_set_led_brightness(struct da9052_led *led) +{ + u8 val; + int error; + + val = (led->brightness & 0x7f) | DA9052_LED_CONT_DIM; + + error = da9052_reg_write(led->da9052, led_reg[led->led_index], val); + if (error < 0) + dev_err(led->da9052->dev, "Failed to set led brightness, %d\n", + error); + return error; +} + +static void da9052_led_work(struct work_struct *work) +{ + struct da9052_led *led = container_of(work, struct da9052_led, work); + + da9052_set_led_brightness(led); +} + +static void da9052_led_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct da9052_led *led; + + led = container_of(led_cdev, struct da9052_led, cdev); + led->brightness = value; + schedule_work(&led->work); +} + +static int da9052_configure_leds(struct da9052 *da9052) +{ + int error; + unsigned char register_value = DA9052_OPENDRAIN_OUTPUT + | DA9052_SET_HIGH_LVL_OUTPUT; + + error = da9052_reg_update(da9052, DA9052_GPIO_14_15_REG, + DA9052_MASK_LOWER_NIBBLE, + register_value); + + if (error < 0) { + dev_err(da9052->dev, "Failed to write GPIO 14-15 reg, %d\n", + error); + return error; + } + + error = da9052_reg_update(da9052, DA9052_GPIO_14_15_REG, + DA9052_MASK_UPPER_NIBBLE, + register_value << DA9052_NIBBLE_SHIFT); + if (error < 0) + dev_err(da9052->dev, "Failed to write GPIO 14-15 reg, %d\n", + error); + + return error; +} + +static int __devinit da9052_led_probe(struct platform_device *pdev) +{ + struct da9052_pdata *pdata; + struct da9052 *da9052; + struct led_platform_data *pled; + struct da9052_led *led = NULL; + int error = -ENODEV; + int i; + + da9052 = dev_get_drvdata(pdev->dev.parent); + pdata = da9052->dev->platform_data; + if (pdata == NULL) { + dev_err(&pdev->dev, "No platform data\n"); + goto err; + } + + pled = pdata->pled; + if (pled == NULL) { + dev_err(&pdev->dev, "No platform data for LED\n"); + goto err; + } + + led = devm_kzalloc(&pdev->dev, + sizeof(struct da9052_led) * pled->num_leds, + GFP_KERNEL); + if (led == NULL) { + dev_err(&pdev->dev, "Failed to alloc memory\n"); + error = -ENOMEM; + goto err; + } + + for (i = 0; i < pled->num_leds; i++) { + led[i].cdev.name = pled->leds[i].name; + led[i].cdev.brightness_set = da9052_led_set; + led[i].cdev.brightness = LED_OFF; + led[i].cdev.max_brightness = DA9052_MAX_BRIGHTNESS; + led[i].brightness = LED_OFF; + led[i].led_index = pled->leds[i].flags; + led[i].da9052 = dev_get_drvdata(pdev->dev.parent); + INIT_WORK(&led[i].work, da9052_led_work); + + error = led_classdev_register(pdev->dev.parent, &led[i].cdev); + if (error) { + dev_err(&pdev->dev, "Failed to register led %d\n", + led[i].led_index); + goto err_register; + } + + error = da9052_set_led_brightness(&led[i]); + if (error) { + dev_err(&pdev->dev, "Unable to init led %d\n", + led[i].led_index); + continue; + } + } + error = da9052_configure_leds(led->da9052); + if (error) { + dev_err(&pdev->dev, "Failed to configure GPIO LED%d\n", error); + goto err_register; + } + + platform_set_drvdata(pdev, led); + + return 0; + +err_register: + for (i = i - 1; i >= 0; i--) { + led_classdev_unregister(&led[i].cdev); + cancel_work_sync(&led[i].work); + } +err: + return error; +} + +static int __devexit da9052_led_remove(struct platform_device *pdev) +{ + struct da9052_led *led = platform_get_drvdata(pdev); + struct da9052_pdata *pdata; + struct da9052 *da9052; + struct led_platform_data *pled; + int i; + + da9052 = dev_get_drvdata(pdev->dev.parent); + pdata = da9052->dev->platform_data; + pled = pdata->pled; + + for (i = 0; i < pled->num_leds; i++) { + led[i].brightness = 0; + da9052_set_led_brightness(&led[i]); + led_classdev_unregister(&led[i].cdev); + cancel_work_sync(&led[i].work); + } + + return 0; +} + +static struct platform_driver da9052_led_driver = { + .driver = { + .name = "da9052-leds", + .owner = THIS_MODULE, + }, + .probe = da9052_led_probe, + .remove = __devexit_p(da9052_led_remove), +}; + +module_platform_driver(da9052_led_driver); + +MODULE_AUTHOR("Dialog Semiconductor Ltd <dchen@diasemi.com>"); +MODULE_DESCRIPTION("LED driver for Dialog DA9052 PMIC"); +MODULE_LICENSE("GPL"); diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c index 968fd5fef4fc..84ba6de8039c 100644 --- a/drivers/leds/leds-lm3530.c +++ b/drivers/leds/leds-lm3530.c @@ -113,6 +113,18 @@ struct lm3530_data { bool enable; }; +/* + * struct lm3530_als_data + * @config : value of ALS configuration register + * @imp_sel : value of ALS resistor select register + * @zone : values of ALS ZB(Zone Boundary) registers + */ +struct lm3530_als_data { + u8 config; + u8 imp_sel; + u8 zones[LM3530_ALS_ZB_MAX]; +}; + static const u8 lm3530_reg[LM3530_REG_MAX] = { LM3530_GEN_CONFIG, LM3530_ALS_CONFIG, @@ -141,29 +153,65 @@ static int lm3530_get_mode_from_str(const char *str) return -1; } +static void lm3530_als_configure(struct lm3530_platform_data *pdata, + struct lm3530_als_data *als) +{ + int i; + u32 als_vmin, als_vmax, als_vstep; + + if (pdata->als_vmax == 0) { + pdata->als_vmin = 0; + pdata->als_vmax = LM3530_ALS_WINDOW_mV; + } + + als_vmin = pdata->als_vmin; + als_vmax = pdata->als_vmax; + + if ((als_vmax - als_vmin) > LM3530_ALS_WINDOW_mV) + pdata->als_vmax = als_vmax = als_vmin + LM3530_ALS_WINDOW_mV; + + /* n zone boundary makes n+1 zones */ + als_vstep = (als_vmax - als_vmin) / (LM3530_ALS_ZB_MAX + 1); + + for (i = 0; i < LM3530_ALS_ZB_MAX; i++) + als->zones[i] = (((als_vmin + LM3530_ALS_OFFSET_mV) + + als_vstep + (i * als_vstep)) * LED_FULL) / 1000; + + als->config = + (pdata->als_avrg_time << LM3530_ALS_AVG_TIME_SHIFT) | + (LM3530_ENABLE_ALS) | + (pdata->als_input_mode << LM3530_ALS_SEL_SHIFT); + + als->imp_sel = + (pdata->als1_resistor_sel << LM3530_ALS1_IMP_SHIFT) | + (pdata->als2_resistor_sel << LM3530_ALS2_IMP_SHIFT); +} + static int lm3530_init_registers(struct lm3530_data *drvdata) { int ret = 0; int i; u8 gen_config; - u8 als_config = 0; u8 brt_ramp; - u8 als_imp_sel = 0; u8 brightness; u8 reg_val[LM3530_REG_MAX]; - u8 zones[LM3530_ALS_ZB_MAX]; - u32 als_vmin, als_vmax, als_vstep; struct lm3530_platform_data *pdata = drvdata->pdata; struct i2c_client *client = drvdata->client; struct lm3530_pwm_data *pwm = &pdata->pwm_data; + struct lm3530_als_data als; + + memset(&als, 0, sizeof(struct lm3530_als_data)); gen_config = (pdata->brt_ramp_law << LM3530_RAMP_LAW_SHIFT) | ((pdata->max_current & 7) << LM3530_MAX_CURR_SHIFT); switch (drvdata->mode) { case LM3530_BL_MODE_MANUAL: + gen_config |= LM3530_ENABLE_I2C; + break; case LM3530_BL_MODE_ALS: gen_config |= LM3530_ENABLE_I2C; + lm3530_als_configure(pdata, &als); break; case LM3530_BL_MODE_PWM: gen_config |= LM3530_ENABLE_PWM | LM3530_ENABLE_PWM_SIMPLE | @@ -171,38 +219,6 @@ static int lm3530_init_registers(struct lm3530_data *drvdata) break; } - if (drvdata->mode == LM3530_BL_MODE_ALS) { - if (pdata->als_vmax == 0) { - pdata->als_vmin = 0; - pdata->als_vmax = LM3530_ALS_WINDOW_mV; - } - - als_vmin = pdata->als_vmin; - als_vmax = pdata->als_vmax; - - if ((als_vmax - als_vmin) > LM3530_ALS_WINDOW_mV) - pdata->als_vmax = als_vmax = - als_vmin + LM3530_ALS_WINDOW_mV; - - /* n zone boundary makes n+1 zones */ - als_vstep = (als_vmax - als_vmin) / (LM3530_ALS_ZB_MAX + 1); - - for (i = 0; i < LM3530_ALS_ZB_MAX; i++) - zones[i] = (((als_vmin + LM3530_ALS_OFFSET_mV) + - als_vstep + (i * als_vstep)) * LED_FULL) - / 1000; - - als_config = - (pdata->als_avrg_time << LM3530_ALS_AVG_TIME_SHIFT) | - (LM3530_ENABLE_ALS) | - (pdata->als_input_mode << LM3530_ALS_SEL_SHIFT); - - als_imp_sel = - (pdata->als1_resistor_sel << LM3530_ALS1_IMP_SHIFT) | - (pdata->als2_resistor_sel << LM3530_ALS2_IMP_SHIFT); - - } - brt_ramp = (pdata->brt_ramp_fall << LM3530_BRT_RAMP_FALL_SHIFT) | (pdata->brt_ramp_rise << LM3530_BRT_RAMP_RISE_SHIFT); @@ -215,14 +231,14 @@ static int lm3530_init_registers(struct lm3530_data *drvdata) brightness = drvdata->led_dev.max_brightness; reg_val[0] = gen_config; /* LM3530_GEN_CONFIG */ - reg_val[1] = als_config; /* LM3530_ALS_CONFIG */ + reg_val[1] = als.config; /* LM3530_ALS_CONFIG */ reg_val[2] = brt_ramp; /* LM3530_BRT_RAMP_RATE */ - reg_val[3] = als_imp_sel; /* LM3530_ALS_IMP_SELECT */ + reg_val[3] = als.imp_sel; /* LM3530_ALS_IMP_SELECT */ reg_val[4] = brightness; /* LM3530_BRT_CTRL_REG */ - reg_val[5] = zones[0]; /* LM3530_ALS_ZB0_REG */ - reg_val[6] = zones[1]; /* LM3530_ALS_ZB1_REG */ - reg_val[7] = zones[2]; /* LM3530_ALS_ZB2_REG */ - reg_val[8] = zones[3]; /* LM3530_ALS_ZB3_REG */ + reg_val[5] = als.zones[0]; /* LM3530_ALS_ZB0_REG */ + reg_val[6] = als.zones[1]; /* LM3530_ALS_ZB1_REG */ + reg_val[7] = als.zones[2]; /* LM3530_ALS_ZB2_REG */ + reg_val[8] = als.zones[3]; /* LM3530_ALS_ZB3_REG */ reg_val[9] = LM3530_DEF_ZT_0; /* LM3530_ALS_Z0T_REG */ reg_val[10] = LM3530_DEF_ZT_1; /* LM3530_ALS_Z1T_REG */ reg_val[11] = LM3530_DEF_ZT_2; /* LM3530_ALS_Z2T_REG */ diff --git a/drivers/leds/leds-lm3533.c b/drivers/leds/leds-lm3533.c new file mode 100644 index 000000000000..f56b6e7ffdac --- /dev/null +++ b/drivers/leds/leds-lm3533.c @@ -0,0 +1,785 @@ +/* + * leds-lm3533.c -- LM3533 LED driver + * + * Copyright (C) 2011-2012 Texas Instruments + * + * Author: Johan Hovold <jhovold@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/leds.h> +#include <linux/mfd/core.h> +#include <linux/mutex.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/workqueue.h> + +#include <linux/mfd/lm3533.h> + + +#define LM3533_LVCTRLBANK_MIN 2 +#define LM3533_LVCTRLBANK_MAX 5 +#define LM3533_LVCTRLBANK_COUNT 4 +#define LM3533_RISEFALLTIME_MAX 7 +#define LM3533_ALS_CHANNEL_LV_MIN 1 +#define LM3533_ALS_CHANNEL_LV_MAX 2 + +#define LM3533_REG_CTRLBANK_BCONF_BASE 0x1b +#define LM3533_REG_PATTERN_ENABLE 0x28 +#define LM3533_REG_PATTERN_LOW_TIME_BASE 0x71 +#define LM3533_REG_PATTERN_HIGH_TIME_BASE 0x72 +#define LM3533_REG_PATTERN_RISETIME_BASE 0x74 +#define LM3533_REG_PATTERN_FALLTIME_BASE 0x75 + +#define LM3533_REG_PATTERN_STEP 0x10 + +#define LM3533_REG_CTRLBANK_BCONF_MAPPING_MASK 0x04 +#define LM3533_REG_CTRLBANK_BCONF_ALS_EN_MASK 0x02 +#define LM3533_REG_CTRLBANK_BCONF_ALS_CHANNEL_MASK 0x01 + +#define LM3533_LED_FLAG_PATTERN_ENABLE 1 + + +struct lm3533_led { + struct lm3533 *lm3533; + struct lm3533_ctrlbank cb; + struct led_classdev cdev; + int id; + + struct mutex mutex; + unsigned long flags; + + struct work_struct work; + u8 new_brightness; +}; + + +static inline struct lm3533_led *to_lm3533_led(struct led_classdev *cdev) +{ + return container_of(cdev, struct lm3533_led, cdev); +} + +static inline int lm3533_led_get_ctrlbank_id(struct lm3533_led *led) +{ + return led->id + 2; +} + +static inline u8 lm3533_led_get_lv_reg(struct lm3533_led *led, u8 base) +{ + return base + led->id; +} + +static inline u8 lm3533_led_get_pattern(struct lm3533_led *led) +{ + return led->id; +} + +static inline u8 lm3533_led_get_pattern_reg(struct lm3533_led *led, + u8 base) +{ + return base + lm3533_led_get_pattern(led) * LM3533_REG_PATTERN_STEP; +} + +static int lm3533_led_pattern_enable(struct lm3533_led *led, int enable) +{ + u8 mask; + u8 val; + int pattern; + int state; + int ret = 0; + + dev_dbg(led->cdev.dev, "%s - %d\n", __func__, enable); + + mutex_lock(&led->mutex); + + state = test_bit(LM3533_LED_FLAG_PATTERN_ENABLE, &led->flags); + if ((enable && state) || (!enable && !state)) + goto out; + + pattern = lm3533_led_get_pattern(led); + mask = 1 << (2 * pattern); + + if (enable) + val = mask; + else + val = 0; + + ret = lm3533_update(led->lm3533, LM3533_REG_PATTERN_ENABLE, val, mask); + if (ret) { + dev_err(led->cdev.dev, "failed to enable pattern %d (%d)\n", + pattern, enable); + goto out; + } + + __change_bit(LM3533_LED_FLAG_PATTERN_ENABLE, &led->flags); +out: + mutex_unlock(&led->mutex); + + return ret; +} + +static void lm3533_led_work(struct work_struct *work) +{ + struct lm3533_led *led = container_of(work, struct lm3533_led, work); + + dev_dbg(led->cdev.dev, "%s - %u\n", __func__, led->new_brightness); + + if (led->new_brightness == 0) + lm3533_led_pattern_enable(led, 0); /* disable blink */ + + lm3533_ctrlbank_set_brightness(&led->cb, led->new_brightness); +} + +static void lm3533_led_set(struct led_classdev *cdev, + enum led_brightness value) +{ + struct lm3533_led *led = to_lm3533_led(cdev); + + dev_dbg(led->cdev.dev, "%s - %d\n", __func__, value); + + led->new_brightness = value; + schedule_work(&led->work); +} + +static enum led_brightness lm3533_led_get(struct led_classdev *cdev) +{ + struct lm3533_led *led = to_lm3533_led(cdev); + u8 val; + int ret; + + ret = lm3533_ctrlbank_get_brightness(&led->cb, &val); + if (ret) + return ret; + + dev_dbg(led->cdev.dev, "%s - %u\n", __func__, val); + + return val; +} + +/* Pattern generator defines (delays in us). */ +#define LM3533_LED_DELAY1_VMIN 0x00 +#define LM3533_LED_DELAY2_VMIN 0x3d +#define LM3533_LED_DELAY3_VMIN 0x80 + +#define LM3533_LED_DELAY1_VMAX (LM3533_LED_DELAY2_VMIN - 1) +#define LM3533_LED_DELAY2_VMAX (LM3533_LED_DELAY3_VMIN - 1) +#define LM3533_LED_DELAY3_VMAX 0xff + +#define LM3533_LED_DELAY1_TMIN 16384U +#define LM3533_LED_DELAY2_TMIN 1130496U +#define LM3533_LED_DELAY3_TMIN 10305536U + +#define LM3533_LED_DELAY1_TMAX 999424U +#define LM3533_LED_DELAY2_TMAX 9781248U +#define LM3533_LED_DELAY3_TMAX 76890112U + +/* t_step = (t_max - t_min) / (v_max - v_min) */ +#define LM3533_LED_DELAY1_TSTEP 16384 +#define LM3533_LED_DELAY2_TSTEP 131072 +#define LM3533_LED_DELAY3_TSTEP 524288 + +/* Delay limits for hardware accelerated blinking (in ms). */ +#define LM3533_LED_DELAY_ON_MAX \ + ((LM3533_LED_DELAY2_TMAX + LM3533_LED_DELAY2_TSTEP / 2) / 1000) +#define LM3533_LED_DELAY_OFF_MAX \ + ((LM3533_LED_DELAY3_TMAX + LM3533_LED_DELAY3_TSTEP / 2) / 1000) + +/* + * Returns linear map of *t from [t_min,t_max] to [v_min,v_max] with a step + * size of t_step, where + * + * t_step = (t_max - t_min) / (v_max - v_min) + * + * and updates *t to reflect the mapped value. + */ +static u8 time_to_val(unsigned *t, unsigned t_min, unsigned t_step, + u8 v_min, u8 v_max) +{ + unsigned val; + + val = (*t + t_step / 2 - t_min) / t_step + v_min; + + *t = t_step * (val - v_min) + t_min; + + return (u8)val; +} + +/* + * Returns time code corresponding to *delay (in ms) and updates *delay to + * reflect actual hardware delay. + * + * Hardware supports 256 discrete delay times, divided into three groups with + * the following ranges and step-sizes: + * + * [ 16, 999] [0x00, 0x3e] step 16 ms + * [ 1130, 9781] [0x3d, 0x7f] step 131 ms + * [10306, 76890] [0x80, 0xff] step 524 ms + * + * Note that delay group 3 is only available for delay_off. + */ +static u8 lm3533_led_get_hw_delay(unsigned *delay) +{ + unsigned t; + u8 val; + + t = *delay * 1000; + + if (t >= (LM3533_LED_DELAY2_TMAX + LM3533_LED_DELAY3_TMIN) / 2) { + t = clamp(t, LM3533_LED_DELAY3_TMIN, LM3533_LED_DELAY3_TMAX); + val = time_to_val(&t, LM3533_LED_DELAY3_TMIN, + LM3533_LED_DELAY3_TSTEP, + LM3533_LED_DELAY3_VMIN, + LM3533_LED_DELAY3_VMAX); + } else if (t >= (LM3533_LED_DELAY1_TMAX + LM3533_LED_DELAY2_TMIN) / 2) { + t = clamp(t, LM3533_LED_DELAY2_TMIN, LM3533_LED_DELAY2_TMAX); + val = time_to_val(&t, LM3533_LED_DELAY2_TMIN, + LM3533_LED_DELAY2_TSTEP, + LM3533_LED_DELAY2_VMIN, + LM3533_LED_DELAY2_VMAX); + } else { + t = clamp(t, LM3533_LED_DELAY1_TMIN, LM3533_LED_DELAY1_TMAX); + val = time_to_val(&t, LM3533_LED_DELAY1_TMIN, + LM3533_LED_DELAY1_TSTEP, + LM3533_LED_DELAY1_VMIN, + LM3533_LED_DELAY1_VMAX); + } + + *delay = (t + 500) / 1000; + + return val; +} + +/* + * Set delay register base to *delay (in ms) and update *delay to reflect + * actual hardware delay used. + */ +static u8 lm3533_led_delay_set(struct lm3533_led *led, u8 base, + unsigned long *delay) +{ + unsigned t; + u8 val; + u8 reg; + int ret; + + t = (unsigned)*delay; + + /* Delay group 3 is only available for low time (delay off). */ + if (base != LM3533_REG_PATTERN_LOW_TIME_BASE) + t = min(t, LM3533_LED_DELAY2_TMAX / 1000); + + val = lm3533_led_get_hw_delay(&t); + + dev_dbg(led->cdev.dev, "%s - %lu: %u (0x%02x)\n", __func__, + *delay, t, val); + reg = lm3533_led_get_pattern_reg(led, base); + ret = lm3533_write(led->lm3533, reg, val); + if (ret) + dev_err(led->cdev.dev, "failed to set delay (%02x)\n", reg); + + *delay = t; + + return ret; +} + +static int lm3533_led_delay_on_set(struct lm3533_led *led, unsigned long *t) +{ + return lm3533_led_delay_set(led, LM3533_REG_PATTERN_HIGH_TIME_BASE, t); +} + +static int lm3533_led_delay_off_set(struct lm3533_led *led, unsigned long *t) +{ + return lm3533_led_delay_set(led, LM3533_REG_PATTERN_LOW_TIME_BASE, t); +} + +static int lm3533_led_blink_set(struct led_classdev *cdev, + unsigned long *delay_on, + unsigned long *delay_off) +{ + struct lm3533_led *led = to_lm3533_led(cdev); + int ret; + + dev_dbg(led->cdev.dev, "%s - on = %lu, off = %lu\n", __func__, + *delay_on, *delay_off); + + if (*delay_on > LM3533_LED_DELAY_ON_MAX || + *delay_off > LM3533_LED_DELAY_OFF_MAX) + return -EINVAL; + + if (*delay_on == 0 && *delay_off == 0) { + *delay_on = 500; + *delay_off = 500; + } + + ret = lm3533_led_delay_on_set(led, delay_on); + if (ret) + return ret; + + ret = lm3533_led_delay_off_set(led, delay_off); + if (ret) + return ret; + + return lm3533_led_pattern_enable(led, 1); +} + +static ssize_t show_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm3533_led *led = to_lm3533_led(led_cdev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", led->id); +} + +/* + * Pattern generator rise/fall times: + * + * 0 - 2048 us (default) + * 1 - 262 ms + * 2 - 524 ms + * 3 - 1.049 s + * 4 - 2.097 s + * 5 - 4.194 s + * 6 - 8.389 s + * 7 - 16.78 s + */ +static ssize_t show_risefalltime(struct device *dev, + struct device_attribute *attr, + char *buf, u8 base) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm3533_led *led = to_lm3533_led(led_cdev); + ssize_t ret; + u8 reg; + u8 val; + + reg = lm3533_led_get_pattern_reg(led, base); + ret = lm3533_read(led->lm3533, reg, &val); + if (ret) + return ret; + + return scnprintf(buf, PAGE_SIZE, "%x\n", val); +} + +static ssize_t show_risetime(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_risefalltime(dev, attr, buf, + LM3533_REG_PATTERN_RISETIME_BASE); +} + +static ssize_t show_falltime(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_risefalltime(dev, attr, buf, + LM3533_REG_PATTERN_FALLTIME_BASE); +} + +static ssize_t store_risefalltime(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len, u8 base) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm3533_led *led = to_lm3533_led(led_cdev); + u8 val; + u8 reg; + int ret; + + if (kstrtou8(buf, 0, &val) || val > LM3533_RISEFALLTIME_MAX) + return -EINVAL; + + reg = lm3533_led_get_pattern_reg(led, base); + ret = lm3533_write(led->lm3533, reg, val); + if (ret) + return ret; + + return len; +} + +static ssize_t store_risetime(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_risefalltime(dev, attr, buf, len, + LM3533_REG_PATTERN_RISETIME_BASE); +} + +static ssize_t store_falltime(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_risefalltime(dev, attr, buf, len, + LM3533_REG_PATTERN_FALLTIME_BASE); +} + +static ssize_t show_als_channel(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm3533_led *led = to_lm3533_led(led_cdev); + unsigned channel; + u8 reg; + u8 val; + int ret; + + reg = lm3533_led_get_lv_reg(led, LM3533_REG_CTRLBANK_BCONF_BASE); + ret = lm3533_read(led->lm3533, reg, &val); + if (ret) + return ret; + + channel = (val & LM3533_REG_CTRLBANK_BCONF_ALS_CHANNEL_MASK) + 1; + + return scnprintf(buf, PAGE_SIZE, "%u\n", channel); +} + +static ssize_t store_als_channel(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm3533_led *led = to_lm3533_led(led_cdev); + unsigned channel; + u8 reg; + u8 val; + u8 mask; + int ret; + + if (kstrtouint(buf, 0, &channel)) + return -EINVAL; + + if (channel < LM3533_ALS_CHANNEL_LV_MIN || + channel > LM3533_ALS_CHANNEL_LV_MAX) + return -EINVAL; + + reg = lm3533_led_get_lv_reg(led, LM3533_REG_CTRLBANK_BCONF_BASE); + mask = LM3533_REG_CTRLBANK_BCONF_ALS_CHANNEL_MASK; + val = channel - 1; + + ret = lm3533_update(led->lm3533, reg, val, mask); + if (ret) + return ret; + + return len; +} + +static ssize_t show_als_en(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm3533_led *led = to_lm3533_led(led_cdev); + bool enable; + u8 reg; + u8 val; + int ret; + + reg = lm3533_led_get_lv_reg(led, LM3533_REG_CTRLBANK_BCONF_BASE); + ret = lm3533_read(led->lm3533, reg, &val); + if (ret) + return ret; + + enable = val & LM3533_REG_CTRLBANK_BCONF_ALS_EN_MASK; + + return scnprintf(buf, PAGE_SIZE, "%d\n", enable); +} + +static ssize_t store_als_en(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm3533_led *led = to_lm3533_led(led_cdev); + unsigned enable; + u8 reg; + u8 mask; + u8 val; + int ret; + + if (kstrtouint(buf, 0, &enable)) + return -EINVAL; + + reg = lm3533_led_get_lv_reg(led, LM3533_REG_CTRLBANK_BCONF_BASE); + mask = LM3533_REG_CTRLBANK_BCONF_ALS_EN_MASK; + + if (enable) + val = mask; + else + val = 0; + + ret = lm3533_update(led->lm3533, reg, val, mask); + if (ret) + return ret; + + return len; +} + +static ssize_t show_linear(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm3533_led *led = to_lm3533_led(led_cdev); + u8 reg; + u8 val; + int linear; + int ret; + + reg = lm3533_led_get_lv_reg(led, LM3533_REG_CTRLBANK_BCONF_BASE); + ret = lm3533_read(led->lm3533, reg, &val); + if (ret) + return ret; + + if (val & LM3533_REG_CTRLBANK_BCONF_MAPPING_MASK) + linear = 1; + else + linear = 0; + + return scnprintf(buf, PAGE_SIZE, "%x\n", linear); +} + +static ssize_t store_linear(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm3533_led *led = to_lm3533_led(led_cdev); + unsigned long linear; + u8 reg; + u8 mask; + u8 val; + int ret; + + if (kstrtoul(buf, 0, &linear)) + return -EINVAL; + + reg = lm3533_led_get_lv_reg(led, LM3533_REG_CTRLBANK_BCONF_BASE); + mask = LM3533_REG_CTRLBANK_BCONF_MAPPING_MASK; + + if (linear) + val = mask; + else + val = 0; + + ret = lm3533_update(led->lm3533, reg, val, mask); + if (ret) + return ret; + + return len; +} + +static ssize_t show_pwm(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm3533_led *led = to_lm3533_led(led_cdev); + u8 val; + int ret; + + ret = lm3533_ctrlbank_get_pwm(&led->cb, &val); + if (ret) + return ret; + + return scnprintf(buf, PAGE_SIZE, "%u\n", val); +} + +static ssize_t store_pwm(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm3533_led *led = to_lm3533_led(led_cdev); + u8 val; + int ret; + + if (kstrtou8(buf, 0, &val)) + return -EINVAL; + + ret = lm3533_ctrlbank_set_pwm(&led->cb, val); + if (ret) + return ret; + + return len; +} + +static LM3533_ATTR_RW(als_channel); +static LM3533_ATTR_RW(als_en); +static LM3533_ATTR_RW(falltime); +static LM3533_ATTR_RO(id); +static LM3533_ATTR_RW(linear); +static LM3533_ATTR_RW(pwm); +static LM3533_ATTR_RW(risetime); + +static struct attribute *lm3533_led_attributes[] = { + &dev_attr_als_channel.attr, + &dev_attr_als_en.attr, + &dev_attr_falltime.attr, + &dev_attr_id.attr, + &dev_attr_linear.attr, + &dev_attr_pwm.attr, + &dev_attr_risetime.attr, + NULL, +}; + +static umode_t lm3533_led_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm3533_led *led = to_lm3533_led(led_cdev); + umode_t mode = attr->mode; + + if (attr == &dev_attr_als_channel.attr || + attr == &dev_attr_als_en.attr) { + if (!led->lm3533->have_als) + mode = 0; + } + + return mode; +}; + +static struct attribute_group lm3533_led_attribute_group = { + .is_visible = lm3533_led_attr_is_visible, + .attrs = lm3533_led_attributes +}; + +static int __devinit lm3533_led_setup(struct lm3533_led *led, + struct lm3533_led_platform_data *pdata) +{ + int ret; + + ret = lm3533_ctrlbank_set_max_current(&led->cb, pdata->max_current); + if (ret) + return ret; + + return lm3533_ctrlbank_set_pwm(&led->cb, pdata->pwm); +} + +static int __devinit lm3533_led_probe(struct platform_device *pdev) +{ + struct lm3533 *lm3533; + struct lm3533_led_platform_data *pdata; + struct lm3533_led *led; + int ret; + + dev_dbg(&pdev->dev, "%s\n", __func__); + + lm3533 = dev_get_drvdata(pdev->dev.parent); + if (!lm3533) + return -EINVAL; + + pdata = pdev->dev.platform_data; + if (!pdata) { + dev_err(&pdev->dev, "no platform data\n"); + return -EINVAL; + } + + if (pdev->id < 0 || pdev->id >= LM3533_LVCTRLBANK_COUNT) { + dev_err(&pdev->dev, "illegal LED id %d\n", pdev->id); + return -EINVAL; + } + + led = devm_kzalloc(&pdev->dev, sizeof(*led), GFP_KERNEL); + if (!led) + return -ENOMEM; + + led->lm3533 = lm3533; + led->cdev.name = pdata->name; + led->cdev.default_trigger = pdata->default_trigger; + led->cdev.brightness_set = lm3533_led_set; + led->cdev.brightness_get = lm3533_led_get; + led->cdev.blink_set = lm3533_led_blink_set; + led->cdev.brightness = LED_OFF; + led->id = pdev->id; + + mutex_init(&led->mutex); + INIT_WORK(&led->work, lm3533_led_work); + + /* The class framework makes a callback to get brightness during + * registration so use parent device (for error reporting) until + * registered. + */ + led->cb.lm3533 = lm3533; + led->cb.id = lm3533_led_get_ctrlbank_id(led); + led->cb.dev = lm3533->dev; + + platform_set_drvdata(pdev, led); + + ret = led_classdev_register(pdev->dev.parent, &led->cdev); + if (ret) { + dev_err(&pdev->dev, "failed to register LED %d\n", pdev->id); + return ret; + } + + led->cb.dev = led->cdev.dev; + + ret = sysfs_create_group(&led->cdev.dev->kobj, + &lm3533_led_attribute_group); + if (ret < 0) { + dev_err(&pdev->dev, "failed to create sysfs attributes\n"); + goto err_unregister; + } + + ret = lm3533_led_setup(led, pdata); + if (ret) + goto err_sysfs_remove; + + ret = lm3533_ctrlbank_enable(&led->cb); + if (ret) + goto err_sysfs_remove; + + return 0; + +err_sysfs_remove: + sysfs_remove_group(&led->cdev.dev->kobj, &lm3533_led_attribute_group); +err_unregister: + led_classdev_unregister(&led->cdev); + flush_work_sync(&led->work); + + return ret; +} + +static int __devexit lm3533_led_remove(struct platform_device *pdev) +{ + struct lm3533_led *led = platform_get_drvdata(pdev); + + dev_dbg(&pdev->dev, "%s\n", __func__); + + lm3533_ctrlbank_disable(&led->cb); + sysfs_remove_group(&led->cdev.dev->kobj, &lm3533_led_attribute_group); + led_classdev_unregister(&led->cdev); + flush_work_sync(&led->work); + + return 0; +} + +static void lm3533_led_shutdown(struct platform_device *pdev) +{ + + struct lm3533_led *led = platform_get_drvdata(pdev); + + dev_dbg(&pdev->dev, "%s\n", __func__); + + lm3533_ctrlbank_disable(&led->cb); + lm3533_led_set(&led->cdev, LED_OFF); /* disable blink */ + flush_work_sync(&led->work); +} + +static struct platform_driver lm3533_led_driver = { + .driver = { + .name = "lm3533-leds", + .owner = THIS_MODULE, + }, + .probe = lm3533_led_probe, + .remove = __devexit_p(lm3533_led_remove), + .shutdown = lm3533_led_shutdown, +}; +module_platform_driver(lm3533_led_driver); + +MODULE_AUTHOR("Johan Hovold <jhovold@gmail.com>"); +MODULE_DESCRIPTION("LM3533 LED driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:lm3533-leds"); diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 410a723b8691..23815624f35e 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -193,9 +193,14 @@ static int lp5521_load_program(struct lp5521_engine *eng, const u8 *pattern) /* move current engine to direct mode and remember the state */ ret = lp5521_set_engine_mode(eng, LP5521_CMD_DIRECT); + if (ret) + return ret; + /* Mode change requires min 500 us delay. 1 - 2 ms with margin */ usleep_range(1000, 2000); - ret |= lp5521_read(client, LP5521_REG_OP_MODE, &mode); + ret = lp5521_read(client, LP5521_REG_OP_MODE, &mode); + if (ret) + return ret; /* For loading, all the engines to load mode */ lp5521_write(client, LP5521_REG_OP_MODE, LP5521_CMD_DIRECT); @@ -211,8 +216,7 @@ static int lp5521_load_program(struct lp5521_engine *eng, const u8 *pattern) LP5521_PROG_MEM_SIZE, pattern); - ret |= lp5521_write(client, LP5521_REG_OP_MODE, mode); - return ret; + return lp5521_write(client, LP5521_REG_OP_MODE, mode); } static int lp5521_set_led_current(struct lp5521_chip *chip, int led, u8 curr) @@ -785,7 +789,7 @@ static int __devinit lp5521_probe(struct i2c_client *client, * LP5521_REG_ENABLE register will not have any effect - strange! */ ret = lp5521_read(client, LP5521_REG_R_CURRENT, &buf); - if (buf != LP5521_REG_R_CURR_DEFAULT) { + if (ret || buf != LP5521_REG_R_CURR_DEFAULT) { dev_err(&client->dev, "error in resetting chip\n"); goto fail2; } diff --git a/drivers/leds/leds-mc13783.c b/drivers/leds/leds-mc13783.c index 8bc491541550..4cc6a2e3df34 100644 --- a/drivers/leds/leds-mc13783.c +++ b/drivers/leds/leds-mc13783.c @@ -280,7 +280,7 @@ static int __devinit mc13783_led_probe(struct platform_device *pdev) return -EINVAL; } - led = kzalloc(sizeof(*led) * pdata->num_leds, GFP_KERNEL); + led = kcalloc(pdata->num_leds, sizeof(*led), GFP_KERNEL); if (led == NULL) { dev_err(&pdev->dev, "failed to alloc memory\n"); return -ENOMEM; diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c index dcc3bc3d38db..5f462dbf0dbb 100644 --- a/drivers/leds/leds-pca955x.c +++ b/drivers/leds/leds-pca955x.c @@ -101,11 +101,16 @@ static const struct i2c_device_id pca955x_id[] = { }; MODULE_DEVICE_TABLE(i2c, pca955x_id); -struct pca955x_led { +struct pca955x { + struct mutex lock; + struct pca955x_led *leds; struct pca955x_chipdef *chipdef; struct i2c_client *client; +}; + +struct pca955x_led { + struct pca955x *pca955x; struct work_struct work; - spinlock_t lock; enum led_brightness brightness; struct led_classdev led_cdev; int led_num; /* 0 .. 15 potentially */ @@ -140,7 +145,7 @@ static inline u8 pca955x_ledsel(u8 oldval, int led_num, int state) */ static void pca955x_write_psc(struct i2c_client *client, int n, u8 val) { - struct pca955x_led *pca955x = i2c_get_clientdata(client); + struct pca955x *pca955x = i2c_get_clientdata(client); i2c_smbus_write_byte_data(client, pca95xx_num_input_regs(pca955x->chipdef->bits) + 2*n, @@ -156,7 +161,7 @@ static void pca955x_write_psc(struct i2c_client *client, int n, u8 val) */ static void pca955x_write_pwm(struct i2c_client *client, int n, u8 val) { - struct pca955x_led *pca955x = i2c_get_clientdata(client); + struct pca955x *pca955x = i2c_get_clientdata(client); i2c_smbus_write_byte_data(client, pca95xx_num_input_regs(pca955x->chipdef->bits) + 1 + 2*n, @@ -169,7 +174,7 @@ static void pca955x_write_pwm(struct i2c_client *client, int n, u8 val) */ static void pca955x_write_ls(struct i2c_client *client, int n, u8 val) { - struct pca955x_led *pca955x = i2c_get_clientdata(client); + struct pca955x *pca955x = i2c_get_clientdata(client); i2c_smbus_write_byte_data(client, pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n, @@ -182,7 +187,7 @@ static void pca955x_write_ls(struct i2c_client *client, int n, u8 val) */ static u8 pca955x_read_ls(struct i2c_client *client, int n) { - struct pca955x_led *pca955x = i2c_get_clientdata(client); + struct pca955x *pca955x = i2c_get_clientdata(client); return (u8) i2c_smbus_read_byte_data(client, pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n); @@ -190,18 +195,23 @@ static u8 pca955x_read_ls(struct i2c_client *client, int n) static void pca955x_led_work(struct work_struct *work) { - struct pca955x_led *pca955x; + struct pca955x_led *pca955x_led; + struct pca955x *pca955x; u8 ls; int chip_ls; /* which LSx to use (0-3 potentially) */ int ls_led; /* which set of bits within LSx to use (0-3) */ - pca955x = container_of(work, struct pca955x_led, work); - chip_ls = pca955x->led_num / 4; - ls_led = pca955x->led_num % 4; + pca955x_led = container_of(work, struct pca955x_led, work); + pca955x = pca955x_led->pca955x; + + chip_ls = pca955x_led->led_num / 4; + ls_led = pca955x_led->led_num % 4; + + mutex_lock(&pca955x->lock); ls = pca955x_read_ls(pca955x->client, chip_ls); - switch (pca955x->brightness) { + switch (pca955x_led->brightness) { case LED_FULL: ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_LED_ON); break; @@ -219,12 +229,15 @@ static void pca955x_led_work(struct work_struct *work) * OFF, HALF, or FULL. But, this is probably better than * just turning off for all other values. */ - pca955x_write_pwm(pca955x->client, 1, 255-pca955x->brightness); + pca955x_write_pwm(pca955x->client, 1, + 255 - pca955x_led->brightness); ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_BLINK1); break; } pca955x_write_ls(pca955x->client, chip_ls, ls); + + mutex_unlock(&pca955x->lock); } static void pca955x_led_set(struct led_classdev *led_cdev, enum led_brightness value) @@ -233,7 +246,6 @@ static void pca955x_led_set(struct led_classdev *led_cdev, enum led_brightness v pca955x = container_of(led_cdev, struct pca955x_led, led_cdev); - spin_lock(&pca955x->lock); pca955x->brightness = value; /* @@ -241,14 +253,13 @@ static void pca955x_led_set(struct led_classdev *led_cdev, enum led_brightness v * can sleep. */ schedule_work(&pca955x->work); - - spin_unlock(&pca955x->lock); } static int __devinit pca955x_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct pca955x_led *pca955x; + struct pca955x *pca955x; + struct pca955x_led *pca955x_led; struct pca955x_chipdef *chip; struct i2c_adapter *adapter; struct led_platform_data *pdata; @@ -282,39 +293,48 @@ static int __devinit pca955x_probe(struct i2c_client *client, } } - pca955x = kzalloc(sizeof(*pca955x) * chip->bits, GFP_KERNEL); + pca955x = kzalloc(sizeof(*pca955x), GFP_KERNEL); if (!pca955x) return -ENOMEM; + pca955x->leds = kzalloc(sizeof(*pca955x_led) * chip->bits, GFP_KERNEL); + if (!pca955x->leds) { + err = -ENOMEM; + goto exit_nomem; + } + i2c_set_clientdata(client, pca955x); + mutex_init(&pca955x->lock); + pca955x->client = client; + pca955x->chipdef = chip; + for (i = 0; i < chip->bits; i++) { - pca955x[i].chipdef = chip; - pca955x[i].client = client; - pca955x[i].led_num = i; + pca955x_led = &pca955x->leds[i]; + pca955x_led->led_num = i; + pca955x_led->pca955x = pca955x; /* Platform data can specify LED names and default triggers */ if (pdata) { if (pdata->leds[i].name) - snprintf(pca955x[i].name, - sizeof(pca955x[i].name), "pca955x:%s", - pdata->leds[i].name); + snprintf(pca955x_led->name, + sizeof(pca955x_led->name), "pca955x:%s", + pdata->leds[i].name); if (pdata->leds[i].default_trigger) - pca955x[i].led_cdev.default_trigger = + pca955x_led->led_cdev.default_trigger = pdata->leds[i].default_trigger; } else { - snprintf(pca955x[i].name, sizeof(pca955x[i].name), + snprintf(pca955x_led->name, sizeof(pca955x_led->name), "pca955x:%d", i); } - spin_lock_init(&pca955x[i].lock); - - pca955x[i].led_cdev.name = pca955x[i].name; - pca955x[i].led_cdev.brightness_set = pca955x_led_set; + pca955x_led->led_cdev.name = pca955x_led->name; + pca955x_led->led_cdev.brightness_set = pca955x_led_set; - INIT_WORK(&pca955x[i].work, pca955x_led_work); + INIT_WORK(&pca955x_led->work, pca955x_led_work); - err = led_classdev_register(&client->dev, &pca955x[i].led_cdev); + err = led_classdev_register(&client->dev, + &pca955x_led->led_cdev); if (err < 0) goto exit; } @@ -337,10 +357,12 @@ static int __devinit pca955x_probe(struct i2c_client *client, exit: while (i--) { - led_classdev_unregister(&pca955x[i].led_cdev); - cancel_work_sync(&pca955x[i].work); + led_classdev_unregister(&pca955x->leds[i].led_cdev); + cancel_work_sync(&pca955x->leds[i].work); } + kfree(pca955x->leds); +exit_nomem: kfree(pca955x); return err; @@ -348,14 +370,15 @@ exit: static int __devexit pca955x_remove(struct i2c_client *client) { - struct pca955x_led *pca955x = i2c_get_clientdata(client); + struct pca955x *pca955x = i2c_get_clientdata(client); int i; for (i = 0; i < pca955x->chipdef->bits; i++) { - led_classdev_unregister(&pca955x[i].led_cdev); - cancel_work_sync(&pca955x[i].work); + led_classdev_unregister(&pca955x->leds[i].led_cdev); + cancel_work_sync(&pca955x->leds[i].work); } + kfree(pca955x->leds); kfree(pca955x); return 0; diff --git a/drivers/leds/ledtrig-backlight.c b/drivers/leds/ledtrig-backlight.c index 2b513a2ad7de..e2726867c5d4 100644 --- a/drivers/leds/ledtrig-backlight.c +++ b/drivers/leds/ledtrig-backlight.c @@ -120,6 +120,7 @@ static void bl_trig_activate(struct led_classdev *led) ret = fb_register_client(&n->notifier); if (ret) dev_err(led->dev, "unable to register backlight trigger\n"); + led->activated = true; return; @@ -133,10 +134,11 @@ static void bl_trig_deactivate(struct led_classdev *led) struct bl_trig_notifier *n = (struct bl_trig_notifier *) led->trigger_data; - if (n) { + if (led->activated) { device_remove_file(led->dev, &dev_attr_inverted); fb_unregister_client(&n->notifier); kfree(n); + led->activated = false; } } diff --git a/drivers/leds/ledtrig-gpio.c b/drivers/leds/ledtrig-gpio.c index ecc4bf3f37a9..f057c101b896 100644 --- a/drivers/leds/ledtrig-gpio.c +++ b/drivers/leds/ledtrig-gpio.c @@ -200,6 +200,7 @@ static void gpio_trig_activate(struct led_classdev *led) gpio_data->led = led; led->trigger_data = gpio_data; INIT_WORK(&gpio_data->work, gpio_trig_work); + led->activated = true; return; @@ -217,7 +218,7 @@ static void gpio_trig_deactivate(struct led_classdev *led) { struct gpio_trig_data *gpio_data = led->trigger_data; - if (gpio_data) { + if (led->activated) { device_remove_file(led->dev, &dev_attr_gpio); device_remove_file(led->dev, &dev_attr_inverted); device_remove_file(led->dev, &dev_attr_desired_brightness); @@ -225,6 +226,7 @@ static void gpio_trig_deactivate(struct led_classdev *led) if (gpio_data->gpio != 0) free_irq(gpio_to_irq(gpio_data->gpio), led); kfree(gpio_data); + led->activated = false; } } diff --git a/drivers/leds/ledtrig-heartbeat.c b/drivers/leds/ledtrig-heartbeat.c index 759c0bba4a8f..41dc76db4311 100644 --- a/drivers/leds/ledtrig-heartbeat.c +++ b/drivers/leds/ledtrig-heartbeat.c @@ -18,6 +18,7 @@ #include <linux/timer.h> #include <linux/sched.h> #include <linux/leds.h> +#include <linux/reboot.h> #include "leds.h" struct heartbeat_trig_data { @@ -83,15 +84,17 @@ static void heartbeat_trig_activate(struct led_classdev *led_cdev) led_heartbeat_function, (unsigned long) led_cdev); heartbeat_data->phase = 0; led_heartbeat_function(heartbeat_data->timer.data); + led_cdev->activated = true; } static void heartbeat_trig_deactivate(struct led_classdev *led_cdev) { struct heartbeat_trig_data *heartbeat_data = led_cdev->trigger_data; - if (heartbeat_data) { + if (led_cdev->activated) { del_timer_sync(&heartbeat_data->timer); kfree(heartbeat_data); + led_cdev->activated = false; } } @@ -101,13 +104,38 @@ static struct led_trigger heartbeat_led_trigger = { .deactivate = heartbeat_trig_deactivate, }; +static int heartbeat_reboot_notifier(struct notifier_block *nb, + unsigned long code, void *unused) +{ + led_trigger_unregister(&heartbeat_led_trigger); + return NOTIFY_DONE; +} + +static struct notifier_block heartbeat_reboot_nb = { + .notifier_call = heartbeat_reboot_notifier, +}; + +static struct notifier_block heartbeat_panic_nb = { + .notifier_call = heartbeat_reboot_notifier, +}; + static int __init heartbeat_trig_init(void) { - return led_trigger_register(&heartbeat_led_trigger); + int rc = led_trigger_register(&heartbeat_led_trigger); + + if (!rc) { + atomic_notifier_chain_register(&panic_notifier_list, + &heartbeat_panic_nb); + register_reboot_notifier(&heartbeat_reboot_nb); + } + return rc; } static void __exit heartbeat_trig_exit(void) { + unregister_reboot_notifier(&heartbeat_reboot_nb); + atomic_notifier_chain_unregister(&panic_notifier_list, + &heartbeat_panic_nb); led_trigger_unregister(&heartbeat_led_trigger); } diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/ledtrig-timer.c index 328c64c0841c..9010f7abaf2c 100644 --- a/drivers/leds/ledtrig-timer.c +++ b/drivers/leds/ledtrig-timer.c @@ -31,21 +31,17 @@ static ssize_t led_delay_on_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct led_classdev *led_cdev = dev_get_drvdata(dev); - int ret = -EINVAL; - char *after; - unsigned long state = simple_strtoul(buf, &after, 10); - size_t count = after - buf; - - if (isspace(*after)) - count++; - - if (count == size) { - led_blink_set(led_cdev, &state, &led_cdev->blink_delay_off); - led_cdev->blink_delay_on = state; - ret = count; - } + unsigned long state; + ssize_t ret = -EINVAL; + + ret = kstrtoul(buf, 10, &state); + if (ret) + return ret; - return ret; + led_blink_set(led_cdev, &state, &led_cdev->blink_delay_off); + led_cdev->blink_delay_on = state; + + return size; } static ssize_t led_delay_off_show(struct device *dev, @@ -60,21 +56,17 @@ static ssize_t led_delay_off_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct led_classdev *led_cdev = dev_get_drvdata(dev); - int ret = -EINVAL; - char *after; - unsigned long state = simple_strtoul(buf, &after, 10); - size_t count = after - buf; - - if (isspace(*after)) - count++; - - if (count == size) { - led_blink_set(led_cdev, &led_cdev->blink_delay_on, &state); - led_cdev->blink_delay_off = state; - ret = count; - } + unsigned long state; + ssize_t ret = -EINVAL; - return ret; + ret = kstrtoul(buf, 10, &state); + if (ret) + return ret; + + led_blink_set(led_cdev, &led_cdev->blink_delay_on, &state); + led_cdev->blink_delay_off = state; + + return size; } static DEVICE_ATTR(delay_on, 0644, led_delay_on_show, led_delay_on_store); @@ -95,8 +87,7 @@ static void timer_trig_activate(struct led_classdev *led_cdev) led_blink_set(led_cdev, &led_cdev->blink_delay_on, &led_cdev->blink_delay_off); - - led_cdev->trigger_data = (void *)1; + led_cdev->activated = true; return; @@ -106,9 +97,10 @@ err_out_delayon: static void timer_trig_deactivate(struct led_classdev *led_cdev) { - if (led_cdev->trigger_data) { + if (led_cdev->activated) { device_remove_file(led_cdev->dev, &dev_attr_delay_on); device_remove_file(led_cdev->dev, &dev_attr_delay_off); + led_cdev->activated = false; } /* Stop blinking */ diff --git a/drivers/leds/ledtrig-transient.c b/drivers/leds/ledtrig-transient.c new file mode 100644 index 000000000000..83179f435e1e --- /dev/null +++ b/drivers/leds/ledtrig-transient.c @@ -0,0 +1,237 @@ +/* + * LED Kernel Transient Trigger + * + * Copyright (C) 2012 Shuah Khan <shuahkhan@gmail.com> + * + * Based on Richard Purdie's ledtrig-timer.c and Atsushi Nemoto's + * ledtrig-heartbeat.c + * Design and use-case input from Jonas Bonn <jonas@southpole.se> and + * Neil Brown <neilb@suse.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +/* + * Transient trigger allows one shot timer activation. Please refer to + * Documentation/leds/ledtrig-transient.txt for details +*/ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/leds.h> +#include "leds.h" + +struct transient_trig_data { + int activate; + int state; + int restore_state; + unsigned long duration; + struct timer_list timer; +}; + +static void transient_timer_function(unsigned long data) +{ + struct led_classdev *led_cdev = (struct led_classdev *) data; + struct transient_trig_data *transient_data = led_cdev->trigger_data; + + transient_data->activate = 0; + led_set_brightness(led_cdev, transient_data->restore_state); +} + +static ssize_t transient_activate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct transient_trig_data *transient_data = led_cdev->trigger_data; + + return sprintf(buf, "%d\n", transient_data->activate); +} + +static ssize_t transient_activate_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct transient_trig_data *transient_data = led_cdev->trigger_data; + unsigned long state; + ssize_t ret; + + ret = kstrtoul(buf, 10, &state); + if (ret) + return ret; + + if (state != 1 && state != 0) + return -EINVAL; + + /* cancel the running timer */ + if (state == 0 && transient_data->activate == 1) { + del_timer(&transient_data->timer); + transient_data->activate = state; + led_set_brightness(led_cdev, transient_data->restore_state); + return size; + } + + /* start timer if there is no active timer */ + if (state == 1 && transient_data->activate == 0 && + transient_data->duration != 0) { + transient_data->activate = state; + led_set_brightness(led_cdev, transient_data->state); + transient_data->restore_state = + (transient_data->state == LED_FULL) ? LED_OFF : LED_FULL; + mod_timer(&transient_data->timer, + jiffies + transient_data->duration); + } + + /* state == 0 && transient_data->activate == 0 + timer is not active - just return */ + /* state == 1 && transient_data->activate == 1 + timer is already active - just return */ + + return size; +} + +static ssize_t transient_duration_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct transient_trig_data *transient_data = led_cdev->trigger_data; + + return sprintf(buf, "%lu\n", transient_data->duration); +} + +static ssize_t transient_duration_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct transient_trig_data *transient_data = led_cdev->trigger_data; + unsigned long state; + ssize_t ret; + + ret = kstrtoul(buf, 10, &state); + if (ret) + return ret; + + transient_data->duration = state; + return size; +} + +static ssize_t transient_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct transient_trig_data *transient_data = led_cdev->trigger_data; + int state; + + state = (transient_data->state == LED_FULL) ? 1 : 0; + return sprintf(buf, "%d\n", state); +} + +static ssize_t transient_state_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct transient_trig_data *transient_data = led_cdev->trigger_data; + unsigned long state; + ssize_t ret; + + ret = kstrtoul(buf, 10, &state); + if (ret) + return ret; + + if (state != 1 && state != 0) + return -EINVAL; + + transient_data->state = (state == 1) ? LED_FULL : LED_OFF; + return size; +} + +static DEVICE_ATTR(activate, 0644, transient_activate_show, + transient_activate_store); +static DEVICE_ATTR(duration, 0644, transient_duration_show, + transient_duration_store); +static DEVICE_ATTR(state, 0644, transient_state_show, transient_state_store); + +static void transient_trig_activate(struct led_classdev *led_cdev) +{ + int rc; + struct transient_trig_data *tdata; + + tdata = kzalloc(sizeof(struct transient_trig_data), GFP_KERNEL); + if (!tdata) { + dev_err(led_cdev->dev, + "unable to allocate transient trigger\n"); + return; + } + led_cdev->trigger_data = tdata; + + rc = device_create_file(led_cdev->dev, &dev_attr_activate); + if (rc) + goto err_out; + + rc = device_create_file(led_cdev->dev, &dev_attr_duration); + if (rc) + goto err_out_duration; + + rc = device_create_file(led_cdev->dev, &dev_attr_state); + if (rc) + goto err_out_state; + + setup_timer(&tdata->timer, transient_timer_function, + (unsigned long) led_cdev); + led_cdev->activated = true; + + return; + +err_out_state: + device_remove_file(led_cdev->dev, &dev_attr_duration); +err_out_duration: + device_remove_file(led_cdev->dev, &dev_attr_activate); +err_out: + dev_err(led_cdev->dev, "unable to register transient trigger\n"); + led_cdev->trigger_data = NULL; + kfree(tdata); +} + +static void transient_trig_deactivate(struct led_classdev *led_cdev) +{ + struct transient_trig_data *transient_data = led_cdev->trigger_data; + + if (led_cdev->activated) { + del_timer_sync(&transient_data->timer); + led_set_brightness(led_cdev, transient_data->restore_state); + device_remove_file(led_cdev->dev, &dev_attr_activate); + device_remove_file(led_cdev->dev, &dev_attr_duration); + device_remove_file(led_cdev->dev, &dev_attr_state); + led_cdev->trigger_data = NULL; + led_cdev->activated = false; + kfree(transient_data); + } +} + +static struct led_trigger transient_trigger = { + .name = "transient", + .activate = transient_trig_activate, + .deactivate = transient_trig_deactivate, +}; + +static int __init transient_trig_init(void) +{ + return led_trigger_register(&transient_trigger); +} + +static void __exit transient_trig_exit(void) +{ + led_trigger_unregister(&transient_trigger); +} + +module_init(transient_trig_init); +module_exit(transient_trig_exit); + +MODULE_AUTHOR("Shuah Khan <shuahkhan@gmail.com>"); +MODULE_DESCRIPTION("Transient LED trigger"); +MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index ee79ce64d9df..57787d87d9a4 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -1104,6 +1104,7 @@ static int __devinit toshiba_acpi_add(struct acpi_device *acpi_dev) mutex_init(&dev->mutex); + memset(&props, 0, sizeof(props)); props.type = BACKLIGHT_PLATFORM; props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1; dev->backlight_dev = backlight_device_register("toshiba", diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 4161bfe462cd..08cbdb900a18 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -620,27 +620,6 @@ config RTC_DRV_MSM6242 This driver can also be built as a module. If so, the module will be called rtc-msm6242. -config RTC_DRV_IMXDI - tristate "Freescale IMX DryIce Real Time Clock" - depends on ARCH_MX25 - depends on RTC_CLASS - help - Support for Freescale IMX DryIce RTC - - This driver can also be built as a module, if so, the module - will be called "rtc-imxdi". - -config RTC_MXC - tristate "Freescale MXC Real Time Clock" - depends on ARCH_MXC - depends on RTC_CLASS - help - If you say yes here you get support for the Freescale MXC - RTC module. - - This driver can also be built as a module, if so, the module - will be called "rtc-mxc". - config RTC_DRV_BQ4802 tristate "TI BQ4802" help @@ -738,6 +717,16 @@ config RTC_DRV_DAVINCI This driver can also be built as a module. If so, the module will be called rtc-davinci. +config RTC_DRV_IMXDI + tristate "Freescale IMX DryIce Real Time Clock" + depends on SOC_IMX25 + depends on RTC_CLASS + help + Support for Freescale IMX DryIce RTC + + This driver can also be built as a module, if so, the module + will be called "rtc-imxdi". + config RTC_DRV_OMAP tristate "TI OMAP1" depends on ARCH_OMAP15XX || ARCH_OMAP16XX || ARCH_OMAP730 || ARCH_DAVINCI_DA8XX @@ -1087,4 +1076,15 @@ config RTC_DRV_LOONGSON1 This driver can also be built as a module. If so, the module will be called rtc-ls1x. +config RTC_DRV_MXC + tristate "Freescale MXC Real Time Clock" + depends on ARCH_MXC + depends on RTC_CLASS + help + If you say yes here you get support for the Freescale MXC + RTC module. + + This driver can also be built as a module, if so, the module + will be called "rtc-mxc". + endif # RTC_CLASS diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 727ae7786e6c..2973921c30d8 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -61,7 +61,7 @@ obj-$(CONFIG_RTC_DRV_M41T94) += rtc-m41t94.o obj-$(CONFIG_RTC_DRV_M48T35) += rtc-m48t35.o obj-$(CONFIG_RTC_DRV_M48T59) += rtc-m48t59.o obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o -obj-$(CONFIG_RTC_MXC) += rtc-mxc.o +obj-$(CONFIG_RTC_DRV_MXC) += rtc-mxc.o obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o obj-$(CONFIG_RTC_DRV_MAX8925) += rtc-max8925.o obj-$(CONFIG_RTC_DRV_MAX8998) += rtc-max8998.o diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index c293d0cdb104..836710ce750e 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -17,8 +17,7 @@ #include <linux/string.h> #include <linux/rtc.h> #include <linux/bcd.h> - - +#include <linux/rtc/ds1307.h> /* * We can't determine type by probing, but if we expect pre-Linux code @@ -92,7 +91,8 @@ enum ds_type { # define DS1337_BIT_A2I 0x02 # define DS1337_BIT_A1I 0x01 #define DS1339_REG_ALARM1_SECS 0x07 -#define DS1339_REG_TRICKLE 0x10 + +#define DS13XX_TRICKLE_CHARGER_MAGIC 0xa0 #define RX8025_REG_CTRL1 0x0e # define RX8025_BIT_2412 0x20 @@ -124,6 +124,7 @@ struct chip_desc { unsigned alarm:1; u16 nvram_offset; u16 nvram_size; + u16 trickle_charger_reg; }; static const struct chip_desc chips[last_ds_type] = { @@ -140,6 +141,13 @@ static const struct chip_desc chips[last_ds_type] = { }, [ds_1339] = { .alarm = 1, + .trickle_charger_reg = 0x10, + }, + [ds_1340] = { + .trickle_charger_reg = 0x08, + }, + [ds_1388] = { + .trickle_charger_reg = 0x0a, }, [ds_3231] = { .alarm = 1, @@ -619,6 +627,7 @@ static int __devinit ds1307_probe(struct i2c_client *client, struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); int want_irq = false; unsigned char *buf; + struct ds1307_platform_data *pdata = client->dev.platform_data; static const int bbsqi_bitpos[] = { [ds_1337] = 0, [ds_1339] = DS1339_BIT_BBSQI, @@ -637,7 +646,10 @@ static int __devinit ds1307_probe(struct i2c_client *client, ds1307->client = client; ds1307->type = id->driver_data; - ds1307->offset = 0; + + if (pdata && pdata->trickle_charger_setup && chip->trickle_charger_reg) + i2c_smbus_write_byte_data(client, chip->trickle_charger_reg, + DS13XX_TRICKLE_CHARGER_MAGIC | pdata->trickle_charger_setup); buf = ds1307->regs; if (i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) { diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 14a42a1edc66..9602278ff988 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -127,7 +127,7 @@ static const struct attribute_group ep93xx_rtc_sysfs_files = { .attrs = ep93xx_rtc_attrs, }; -static int __init ep93xx_rtc_probe(struct platform_device *pdev) +static int __devinit ep93xx_rtc_probe(struct platform_device *pdev) { struct ep93xx_rtc *ep93xx_rtc; struct resource *res; @@ -174,7 +174,7 @@ exit: return err; } -static int __exit ep93xx_rtc_remove(struct platform_device *pdev) +static int __devexit ep93xx_rtc_remove(struct platform_device *pdev) { struct ep93xx_rtc *ep93xx_rtc = platform_get_drvdata(pdev); @@ -186,31 +186,19 @@ static int __exit ep93xx_rtc_remove(struct platform_device *pdev) return 0; } -/* work with hotplug and coldplug */ -MODULE_ALIAS("platform:ep93xx-rtc"); - static struct platform_driver ep93xx_rtc_driver = { .driver = { .name = "ep93xx-rtc", .owner = THIS_MODULE, }, - .remove = __exit_p(ep93xx_rtc_remove), + .probe = ep93xx_rtc_probe, + .remove = __devexit_p(ep93xx_rtc_remove), }; -static int __init ep93xx_rtc_init(void) -{ - return platform_driver_probe(&ep93xx_rtc_driver, ep93xx_rtc_probe); -} - -static void __exit ep93xx_rtc_exit(void) -{ - platform_driver_unregister(&ep93xx_rtc_driver); -} +module_platform_driver(ep93xx_rtc_driver); MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>"); MODULE_DESCRIPTION("EP93XX RTC driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); - -module_init(ep93xx_rtc_init); -module_exit(ep93xx_rtc_exit); +MODULE_ALIAS("platform:ep93xx-rtc"); diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c index 63c72189c64b..d5218553741f 100644 --- a/drivers/rtc/rtc-lpc32xx.c +++ b/drivers/rtc/rtc-lpc32xx.c @@ -19,6 +19,7 @@ #include <linux/rtc.h> #include <linux/slab.h> #include <linux/io.h> +#include <linux/of.h> /* * Clock and Power control register offsets @@ -386,13 +387,22 @@ static const struct dev_pm_ops lpc32xx_rtc_pm_ops = { #define LPC32XX_RTC_PM_OPS NULL #endif +#ifdef CONFIG_OF +static const struct of_device_id lpc32xx_rtc_match[] = { + { .compatible = "nxp,lpc3220-rtc" }, + { } +}; +MODULE_DEVICE_TABLE(of, lpc32xx_rtc_match); +#endif + static struct platform_driver lpc32xx_rtc_driver = { .probe = lpc32xx_rtc_probe, .remove = __devexit_p(lpc32xx_rtc_remove), .driver = { .name = RTC_NAME, .owner = THIS_MODULE, - .pm = LPC32XX_RTC_PM_OPS + .pm = LPC32XX_RTC_PM_OPS, + .of_match_table = of_match_ptr(lpc32xx_rtc_match), }, }; diff --git a/drivers/rtc/rtc-m41t93.c b/drivers/rtc/rtc-m41t93.c index 10f1c29436ec..efab3d48cb15 100644 --- a/drivers/rtc/rtc-m41t93.c +++ b/drivers/rtc/rtc-m41t93.c @@ -48,6 +48,7 @@ static inline int m41t93_set_reg(struct spi_device *spi, u8 addr, u8 data) static int m41t93_set_time(struct device *dev, struct rtc_time *tm) { struct spi_device *spi = to_spi_device(dev); + int tmp; u8 buf[9] = {0x80}; /* write cmd + 8 data bytes */ u8 * const data = &buf[1]; /* ptr to first data byte */ @@ -62,6 +63,30 @@ static int m41t93_set_time(struct device *dev, struct rtc_time *tm) return -EINVAL; } + tmp = spi_w8r8(spi, M41T93_REG_FLAGS); + if (tmp < 0) + return tmp; + + if (tmp & M41T93_FLAG_OF) { + dev_warn(&spi->dev, "OF bit is set, resetting.\n"); + m41t93_set_reg(spi, M41T93_REG_FLAGS, tmp & ~M41T93_FLAG_OF); + + tmp = spi_w8r8(spi, M41T93_REG_FLAGS); + if (tmp < 0) { + return tmp; + } else if (tmp & M41T93_FLAG_OF) { + /* OF cannot be immediately reset: oscillator has to be + * restarted. */ + u8 reset_osc = buf[M41T93_REG_ST_SEC] | M41T93_FLAG_ST; + + dev_warn(&spi->dev, + "OF bit is still set, kickstarting clock.\n"); + m41t93_set_reg(spi, M41T93_REG_ST_SEC, reset_osc); + reset_osc &= ~M41T93_FLAG_ST; + m41t93_set_reg(spi, M41T93_REG_ST_SEC, reset_osc); + } + } + data[M41T93_REG_SSEC] = 0; data[M41T93_REG_ST_SEC] = bin2bcd(tm->tm_sec); data[M41T93_REG_MIN] = bin2bcd(tm->tm_min); @@ -89,10 +114,7 @@ static int m41t93_get_time(struct device *dev, struct rtc_time *tm) 1. halt bit (HT) is set: the clock is running but update of readout registers has been disabled due to power failure. This is normal case after poweron. Time is valid after resetting HT bit. - 2. oscillator fail bit (OF) is set. Oscillator has be stopped and - time is invalid: - a) OF can be immeditely reset. - b) OF cannot be immediately reset: oscillator has to be restarted. + 2. oscillator fail bit (OF) is set: time is invalid. */ tmp = spi_w8r8(spi, M41T93_REG_ALM_HOUR_HT); if (tmp < 0) @@ -110,21 +132,7 @@ static int m41t93_get_time(struct device *dev, struct rtc_time *tm) if (tmp & M41T93_FLAG_OF) { ret = -EINVAL; - dev_warn(&spi->dev, "OF bit is set, resetting.\n"); - m41t93_set_reg(spi, M41T93_REG_FLAGS, tmp & ~M41T93_FLAG_OF); - - tmp = spi_w8r8(spi, M41T93_REG_FLAGS); - if (tmp < 0) - return tmp; - else if (tmp & M41T93_FLAG_OF) { - u8 reset_osc = buf[M41T93_REG_ST_SEC] | M41T93_FLAG_ST; - - dev_warn(&spi->dev, - "OF bit is still set, kickstarting clock.\n"); - m41t93_set_reg(spi, M41T93_REG_ST_SEC, reset_osc); - reset_osc &= ~M41T93_FLAG_ST; - m41t93_set_reg(spi, M41T93_REG_ST_SEC, reset_osc); - } + dev_warn(&spi->dev, "OF bit is set, write time to restart.\n"); } if (tmp & M41T93_FLAG_BL) diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index bc0677de1996..97a3284bb7c6 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -64,6 +64,7 @@ struct pcf8563 { * 1970...2069. */ int c_polarity; /* 0: MO_C=1 means 19xx, otherwise MO_C=1 means 20xx */ + int voltage_low; /* incicates if a low_voltage was detected */ }; /* @@ -86,9 +87,11 @@ static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm) return -EIO; } - if (buf[PCF8563_REG_SC] & PCF8563_SC_LV) + if (buf[PCF8563_REG_SC] & PCF8563_SC_LV) { + pcf8563->voltage_low = 1; dev_info(&client->dev, "low voltage detected, date/time is not reliable.\n"); + } dev_dbg(&client->dev, "%s: raw data is st1=%02x, st2=%02x, sec=%02x, min=%02x, hr=%02x, " @@ -173,6 +176,44 @@ static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm) return 0; } +#ifdef CONFIG_RTC_INTF_DEV +static int pcf8563_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) +{ + struct pcf8563 *pcf8563 = i2c_get_clientdata(to_i2c_client(dev)); + struct rtc_time tm; + + switch (cmd) { + case RTC_VL_READ: + if (pcf8563->voltage_low) + dev_info(dev, "low voltage detected, date/time is not reliable.\n"); + + if (copy_to_user((void __user *)arg, &pcf8563->voltage_low, + sizeof(int))) + return -EFAULT; + return 0; + case RTC_VL_CLR: + /* + * Clear the VL bit in the seconds register in case + * the time has not been set already (which would + * have cleared it). This does not really matter + * because of the cached voltage_low value but do it + * anyway for consistency. + */ + if (pcf8563_get_datetime(to_i2c_client(dev), &tm)) + pcf8563_set_datetime(to_i2c_client(dev), &tm); + + /* Clear the cached value. */ + pcf8563->voltage_low = 0; + + return 0; + default: + return -ENOIOCTLCMD; + } +} +#else +#define pcf8563_rtc_ioctl NULL +#endif + static int pcf8563_rtc_read_time(struct device *dev, struct rtc_time *tm) { return pcf8563_get_datetime(to_i2c_client(dev), tm); @@ -184,6 +225,7 @@ static int pcf8563_rtc_set_time(struct device *dev, struct rtc_time *tm) } static const struct rtc_class_ops pcf8563_rtc_ops = { + .ioctl = pcf8563_rtc_ioctl, .read_time = pcf8563_rtc_read_time, .set_time = pcf8563_rtc_set_time, }; diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index f027c063fb20..cc0533994f6e 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -220,17 +220,9 @@ static irqreturn_t pl031_interrupt(int irq, void *dev_id) unsigned long events = 0; rtcmis = readl(ldata->base + RTC_MIS); - if (rtcmis) { - writel(rtcmis, ldata->base + RTC_ICR); - - if (rtcmis & RTC_BIT_AI) - events |= (RTC_AF | RTC_IRQF); - - /* Timer interrupt is only available in ST variants */ - if ((rtcmis & RTC_BIT_PI) && - (ldata->hw_designer == AMBA_VENDOR_ST)) - events |= (RTC_PF | RTC_IRQF); - + if (rtcmis & RTC_BIT_AI) { + writel(RTC_BIT_AI, ldata->base + RTC_ICR); + events |= (RTC_AF | RTC_IRQF); rtc_update_irq(ldata->rtc, 1, events); return IRQ_HANDLED; diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 3f3a29752369..7e6af0b22f17 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -670,6 +670,7 @@ static int s3c_rtc_resume(struct platform_device *pdev) #define s3c_rtc_resume NULL #endif +#ifdef CONFIG_OF static struct s3c_rtc_drv_data s3c_rtc_drv_data_array[] = { [TYPE_S3C2410] = { TYPE_S3C2410 }, [TYPE_S3C2416] = { TYPE_S3C2416 }, @@ -677,7 +678,6 @@ static struct s3c_rtc_drv_data s3c_rtc_drv_data_array[] = { [TYPE_S3C64XX] = { TYPE_S3C64XX }, }; -#ifdef CONFIG_OF static const struct of_device_id s3c_rtc_dt_match[] = { { .compatible = "samsung,s3c2410-rtc", diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c index e38da0dc4187..1f76320e545b 100644 --- a/drivers/rtc/rtc-spear.c +++ b/drivers/rtc/rtc-spear.c @@ -16,6 +16,7 @@ #include <linux/io.h> #include <linux/irq.h> #include <linux/module.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/rtc.h> #include <linux/slab.h> @@ -519,6 +520,14 @@ static void spear_rtc_shutdown(struct platform_device *pdev) clk_disable(config->clk); } +#ifdef CONFIG_OF +static const struct of_device_id spear_rtc_id_table[] = { + { .compatible = "st,spear600-rtc" }, + {} +}; +MODULE_DEVICE_TABLE(of, spear_rtc_id_table); +#endif + static struct platform_driver spear_rtc_driver = { .probe = spear_rtc_probe, .remove = __devexit_p(spear_rtc_remove), @@ -527,6 +536,7 @@ static struct platform_driver spear_rtc_driver = { .shutdown = spear_rtc_shutdown, .driver = { .name = "rtc-spear", + .of_match_table = of_match_ptr(spear_rtc_id_table), }, }; diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c index 75259fe38602..c006025cecc8 100644 --- a/drivers/rtc/rtc-tegra.c +++ b/drivers/rtc/rtc-tegra.c @@ -309,7 +309,8 @@ static int __devinit tegra_rtc_probe(struct platform_device *pdev) struct resource *res; int ret; - info = kzalloc(sizeof(struct tegra_rtc_info), GFP_KERNEL); + info = devm_kzalloc(&pdev->dev, sizeof(struct tegra_rtc_info), + GFP_KERNEL); if (!info) return -ENOMEM; @@ -317,29 +318,18 @@ static int __devinit tegra_rtc_probe(struct platform_device *pdev) if (!res) { dev_err(&pdev->dev, "Unable to allocate resources for device.\n"); - ret = -EBUSY; - goto err_free_info; + return -EBUSY; } - if (!request_mem_region(res->start, resource_size(res), pdev->name)) { - dev_err(&pdev->dev, - "Unable to request mem region for device.\n"); - ret = -EBUSY; - goto err_free_info; + info->rtc_base = devm_request_and_ioremap(&pdev->dev, res); + if (!info->rtc_base) { + dev_err(&pdev->dev, "Unable to request mem region and grab IOs for device.\n"); + return -EBUSY; } info->tegra_rtc_irq = platform_get_irq(pdev, 0); - if (info->tegra_rtc_irq <= 0) { - ret = -EBUSY; - goto err_release_mem_region; - } - - info->rtc_base = ioremap_nocache(res->start, resource_size(res)); - if (!info->rtc_base) { - dev_err(&pdev->dev, "Unable to grab IOs for device.\n"); - ret = -EBUSY; - goto err_release_mem_region; - } + if (info->tegra_rtc_irq <= 0) + return -EBUSY; /* set context info. */ info->pdev = pdev; @@ -362,11 +352,12 @@ static int __devinit tegra_rtc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Unable to register device (err=%d).\n", ret); - goto err_iounmap; + return ret; } - ret = request_irq(info->tegra_rtc_irq, tegra_rtc_irq_handler, - IRQF_TRIGGER_HIGH, "rtc alarm", &pdev->dev); + ret = devm_request_irq(&pdev->dev, info->tegra_rtc_irq, + tegra_rtc_irq_handler, IRQF_TRIGGER_HIGH, + "rtc alarm", &pdev->dev); if (ret) { dev_err(&pdev->dev, "Unable to request interrupt for device (err=%d).\n", @@ -380,12 +371,6 @@ static int __devinit tegra_rtc_probe(struct platform_device *pdev) err_dev_unreg: rtc_device_unregister(info->rtc_dev); -err_iounmap: - iounmap(info->rtc_base); -err_release_mem_region: - release_mem_region(res->start, resource_size(res)); -err_free_info: - kfree(info); return ret; } @@ -393,17 +378,8 @@ err_free_info: static int __devexit tegra_rtc_remove(struct platform_device *pdev) { struct tegra_rtc_info *info = platform_get_drvdata(pdev); - struct resource *res; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -EBUSY; - free_irq(info->tegra_rtc_irq, &pdev->dev); rtc_device_unregister(info->rtc_dev); - iounmap(info->rtc_base); - release_mem_region(res->start, resource_size(res)); - kfree(info); platform_set_drvdata(pdev, NULL); diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 4511420849bc..e84dbecd0991 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/file.h> #include <linux/fs.h> +#include <linux/falloc.h> #include <linux/miscdevice.h> #include <linux/security.h> #include <linux/mm.h> @@ -363,11 +364,12 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc) mutex_lock(&ashmem_mutex); list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { - struct inode *inode = range->asma->file->f_dentry->d_inode; loff_t start = range->pgstart * PAGE_SIZE; - loff_t end = (range->pgend + 1) * PAGE_SIZE - 1; + loff_t end = (range->pgend + 1) * PAGE_SIZE; - vmtruncate_range(inode, start, end); + do_fallocate(range->asma->file, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, end - start); range->purged = ASHMEM_WAS_PURGED; lru_del(range); diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index af16884491ed..fa2b03750316 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -184,6 +184,18 @@ config BACKLIGHT_GENERIC known as the Corgi backlight driver. If you have a Sharp Zaurus SL-C7xx, SL-Cxx00 or SL-6000x say y. +config BACKLIGHT_LM3533 + tristate "Backlight Driver for LM3533" + depends on BACKLIGHT_CLASS_DEVICE + depends on MFD_LM3533 + help + Say Y to enable the backlight driver for National Semiconductor / TI + LM3533 Lighting Power chips. + + The backlights can be controlled directly, through PWM input, or by + the ambient-light-sensor interface. The chip supports 256 brightness + levels. + config BACKLIGHT_LOCOMO tristate "Sharp LOCOMO LCD/Backlight Driver" depends on SHARP_LOCOMO diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index 36855ae887d6..a2ac9cfbaf6b 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_BACKLIGHT_EP93XX) += ep93xx_bl.o obj-$(CONFIG_BACKLIGHT_GENERIC) += generic_bl.o obj-$(CONFIG_BACKLIGHT_HP700) += jornada720_bl.o obj-$(CONFIG_BACKLIGHT_HP680) += hp680_bl.o +obj-$(CONFIG_BACKLIGHT_LM3533) += lm3533_bl.o obj-$(CONFIG_BACKLIGHT_LOCOMO) += locomolcd.o obj-$(CONFIG_BACKLIGHT_LP855X) += lp855x_bl.o obj-$(CONFIG_BACKLIGHT_OMAP1) += omap1_bl.o diff --git a/drivers/video/backlight/adp5520_bl.c b/drivers/video/backlight/adp5520_bl.c index 4911ea7989c8..df5db99af23d 100644 --- a/drivers/video/backlight/adp5520_bl.c +++ b/drivers/video/backlight/adp5520_bl.c @@ -160,7 +160,7 @@ static ssize_t adp5520_store(struct device *dev, const char *buf, unsigned long val; int ret; - ret = strict_strtoul(buf, 10, &val); + ret = kstrtoul(buf, 10, &val); if (ret) return ret; @@ -214,7 +214,7 @@ static ssize_t adp5520_bl_daylight_max_store(struct device *dev, struct adp5520_bl *data = dev_get_drvdata(dev); int ret; - ret = strict_strtoul(buf, 10, &data->cached_daylight_max); + ret = kstrtoul(buf, 10, &data->cached_daylight_max); if (ret < 0) return ret; diff --git a/drivers/video/backlight/adp8860_bl.c b/drivers/video/backlight/adp8860_bl.c index 550dbf0bb896..77d1fdba597f 100644 --- a/drivers/video/backlight/adp8860_bl.c +++ b/drivers/video/backlight/adp8860_bl.c @@ -222,7 +222,8 @@ static int __devinit adp8860_led_probe(struct i2c_client *client) struct led_info *cur_led; int ret, i; - led = kzalloc(sizeof(*led) * pdata->num_leds, GFP_KERNEL); + led = devm_kzalloc(&client->dev, sizeof(*led) * pdata->num_leds, + GFP_KERNEL); if (led == NULL) { dev_err(&client->dev, "failed to alloc memory\n"); return -ENOMEM; @@ -236,7 +237,7 @@ static int __devinit adp8860_led_probe(struct i2c_client *client) if (ret) { dev_err(&client->dev, "failed to write\n"); - goto err_free; + return ret; } for (i = 0; i < pdata->num_leds; ++i) { @@ -291,9 +292,6 @@ static int __devinit adp8860_led_probe(struct i2c_client *client) cancel_work_sync(&led[i].work); } - err_free: - kfree(led); - return ret; } @@ -309,7 +307,6 @@ static int __devexit adp8860_led_remove(struct i2c_client *client) cancel_work_sync(&data->led[i].work); } - kfree(data->led); return 0; } #else @@ -451,7 +448,7 @@ static ssize_t adp8860_store(struct device *dev, const char *buf, unsigned long val; int ret; - ret = strict_strtoul(buf, 10, &val); + ret = kstrtoul(buf, 10, &val); if (ret) return ret; @@ -501,7 +498,7 @@ static ssize_t adp8860_bl_l1_daylight_max_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct adp8860_bl *data = dev_get_drvdata(dev); - int ret = strict_strtoul(buf, 10, &data->cached_daylight_max); + int ret = kstrtoul(buf, 10, &data->cached_daylight_max); if (ret) return ret; @@ -608,7 +605,7 @@ static ssize_t adp8860_bl_ambient_light_zone_store(struct device *dev, uint8_t reg_val; int ret; - ret = strict_strtoul(buf, 10, &val); + ret = kstrtoul(buf, 10, &val); if (ret) return ret; @@ -675,13 +672,13 @@ static int __devinit adp8860_probe(struct i2c_client *client, return -EINVAL; } - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL); if (data == NULL) return -ENOMEM; ret = adp8860_read(client, ADP8860_MFDVID, ®_val); if (ret < 0) - goto out2; + return ret; switch (ADP8860_MANID(reg_val)) { case ADP8863_MANUFID: @@ -694,8 +691,7 @@ static int __devinit adp8860_probe(struct i2c_client *client, break; default: dev_err(&client->dev, "failed to probe\n"); - ret = -ENODEV; - goto out2; + return -ENODEV; } /* It's confirmed that the DEVID field is actually a REVID */ @@ -717,8 +713,7 @@ static int __devinit adp8860_probe(struct i2c_client *client, &client->dev, data, &adp8860_bl_ops, &props); if (IS_ERR(bl)) { dev_err(&client->dev, "failed to register backlight\n"); - ret = PTR_ERR(bl); - goto out2; + return PTR_ERR(bl); } bl->props.brightness = ADP8860_MAX_BRIGHTNESS; @@ -756,8 +751,6 @@ out: &adp8860_bl_attr_group); out1: backlight_device_unregister(bl); -out2: - kfree(data); return ret; } @@ -776,7 +769,6 @@ static int __devexit adp8860_remove(struct i2c_client *client) &adp8860_bl_attr_group); backlight_device_unregister(data->bl); - kfree(data); return 0; } diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c index 9be58c6f18f1..edf7f91c8e61 100644 --- a/drivers/video/backlight/adp8870_bl.c +++ b/drivers/video/backlight/adp8870_bl.c @@ -244,8 +244,8 @@ static int __devinit adp8870_led_probe(struct i2c_client *client) struct led_info *cur_led; int ret, i; - - led = kcalloc(pdata->num_leds, sizeof(*led), GFP_KERNEL); + led = devm_kzalloc(&client->dev, pdata->num_leds * sizeof(*led), + GFP_KERNEL); if (led == NULL) { dev_err(&client->dev, "failed to alloc memory\n"); return -ENOMEM; @@ -253,17 +253,17 @@ static int __devinit adp8870_led_probe(struct i2c_client *client) ret = adp8870_write(client, ADP8870_ISCLAW, pdata->led_fade_law); if (ret) - goto err_free; + return ret; ret = adp8870_write(client, ADP8870_ISCT1, (pdata->led_on_time & 0x3) << 6); if (ret) - goto err_free; + return ret; ret = adp8870_write(client, ADP8870_ISCF, FADE_VAL(pdata->led_fade_in, pdata->led_fade_out)); if (ret) - goto err_free; + return ret; for (i = 0; i < pdata->num_leds; ++i) { cur_led = &pdata->leds[i]; @@ -317,9 +317,6 @@ static int __devinit adp8870_led_probe(struct i2c_client *client) cancel_work_sync(&led[i].work); } - err_free: - kfree(led); - return ret; } @@ -335,7 +332,6 @@ static int __devexit adp8870_led_remove(struct i2c_client *client) cancel_work_sync(&data->led[i].work); } - kfree(data->led); return 0; } #else @@ -572,7 +568,7 @@ static ssize_t adp8870_store(struct device *dev, const char *buf, unsigned long val; int ret; - ret = strict_strtoul(buf, 10, &val); + ret = kstrtoul(buf, 10, &val); if (ret) return ret; @@ -652,7 +648,7 @@ static ssize_t adp8870_bl_l1_daylight_max_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct adp8870_bl *data = dev_get_drvdata(dev); - int ret = strict_strtoul(buf, 10, &data->cached_daylight_max); + int ret = kstrtoul(buf, 10, &data->cached_daylight_max); if (ret) return ret; @@ -794,7 +790,7 @@ static ssize_t adp8870_bl_ambient_light_zone_store(struct device *dev, uint8_t reg_val; int ret; - ret = strict_strtoul(buf, 10, &val); + ret = kstrtoul(buf, 10, &val); if (ret) return ret; @@ -874,7 +870,7 @@ static int __devinit adp8870_probe(struct i2c_client *client, return -ENODEV; } - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL); if (data == NULL) return -ENOMEM; @@ -894,8 +890,7 @@ static int __devinit adp8870_probe(struct i2c_client *client, &client->dev, data, &adp8870_bl_ops, &props); if (IS_ERR(bl)) { dev_err(&client->dev, "failed to register backlight\n"); - ret = PTR_ERR(bl); - goto out2; + return PTR_ERR(bl); } data->bl = bl; @@ -930,8 +925,6 @@ out: &adp8870_bl_attr_group); out1: backlight_device_unregister(bl); -out2: - kfree(data); return ret; } @@ -950,7 +943,6 @@ static int __devexit adp8870_remove(struct i2c_client *client) &adp8870_bl_attr_group); backlight_device_unregister(data->bl); - kfree(data); return 0; } diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c index 7bdadc790117..3729238e7096 100644 --- a/drivers/video/backlight/ams369fg06.c +++ b/drivers/video/backlight/ams369fg06.c @@ -482,7 +482,7 @@ static int __devinit ams369fg06_probe(struct spi_device *spi) struct backlight_device *bd = NULL; struct backlight_properties props; - lcd = kzalloc(sizeof(struct ams369fg06), GFP_KERNEL); + lcd = devm_kzalloc(&spi->dev, sizeof(struct ams369fg06), GFP_KERNEL); if (!lcd) return -ENOMEM; @@ -492,7 +492,7 @@ static int __devinit ams369fg06_probe(struct spi_device *spi) ret = spi_setup(spi); if (ret < 0) { dev_err(&spi->dev, "spi setup failed.\n"); - goto out_free_lcd; + return ret; } lcd->spi = spi; @@ -501,15 +501,13 @@ static int __devinit ams369fg06_probe(struct spi_device *spi) lcd->lcd_pd = spi->dev.platform_data; if (!lcd->lcd_pd) { dev_err(&spi->dev, "platform data is NULL\n"); - goto out_free_lcd; + return -EFAULT; } ld = lcd_device_register("ams369fg06", &spi->dev, lcd, &ams369fg06_lcd_ops); - if (IS_ERR(ld)) { - ret = PTR_ERR(ld); - goto out_free_lcd; - } + if (IS_ERR(ld)) + return PTR_ERR(ld); lcd->ld = ld; @@ -547,8 +545,6 @@ static int __devinit ams369fg06_probe(struct spi_device *spi) out_lcd_unregister: lcd_device_unregister(ld); -out_free_lcd: - kfree(lcd); return ret; } @@ -559,7 +555,6 @@ static int __devexit ams369fg06_remove(struct spi_device *spi) ams369fg06_power(lcd, FB_BLANK_POWERDOWN); backlight_device_unregister(lcd->bd); lcd_device_unregister(lcd->ld); - kfree(lcd); return 0; } @@ -619,7 +614,6 @@ static void ams369fg06_shutdown(struct spi_device *spi) static struct spi_driver ams369fg06_driver = { .driver = { .name = "ams369fg06", - .bus = &spi_bus_type, .owner = THIS_MODULE, }, .probe = ams369fg06_probe, diff --git a/drivers/video/backlight/apple_bl.c b/drivers/video/backlight/apple_bl.c index a523b255e124..9dc73ac3709a 100644 --- a/drivers/video/backlight/apple_bl.c +++ b/drivers/video/backlight/apple_bl.c @@ -16,6 +16,8 @@ * get at the firmware code in order to figure out what it's actually doing. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -25,6 +27,7 @@ #include <linux/pci.h> #include <linux/acpi.h> #include <linux/atomic.h> +#include <linux/apple_bl.h> static struct backlight_device *apple_backlight_device; @@ -39,8 +42,6 @@ struct hw_data { static const struct hw_data *hw_data; -#define DRIVER "apple_backlight: " - /* Module parameters. */ static int debug; module_param_named(debug, debug, int, 0644); @@ -60,8 +61,7 @@ static int intel_chipset_send_intensity(struct backlight_device *bd) int intensity = bd->props.brightness; if (debug) - printk(KERN_DEBUG DRIVER "setting brightness to %d\n", - intensity); + pr_debug("setting brightness to %d\n", intensity); intel_chipset_set_brightness(intensity); return 0; @@ -76,8 +76,7 @@ static int intel_chipset_get_intensity(struct backlight_device *bd) intensity = inb(0xb3) >> 4; if (debug) - printk(KERN_DEBUG DRIVER "read brightness of %d\n", - intensity); + pr_debug("read brightness of %d\n", intensity); return intensity; } @@ -107,8 +106,7 @@ static int nvidia_chipset_send_intensity(struct backlight_device *bd) int intensity = bd->props.brightness; if (debug) - printk(KERN_DEBUG DRIVER "setting brightness to %d\n", - intensity); + pr_debug("setting brightness to %d\n", intensity); nvidia_chipset_set_brightness(intensity); return 0; @@ -123,8 +121,7 @@ static int nvidia_chipset_get_intensity(struct backlight_device *bd) intensity = inb(0x52f) >> 4; if (debug) - printk(KERN_DEBUG DRIVER "read brightness of %d\n", - intensity); + pr_debug("read brightness of %d\n", intensity); return intensity; } @@ -149,7 +146,7 @@ static int __devinit apple_bl_add(struct acpi_device *dev) host = pci_get_bus_and_slot(0, 0); if (!host) { - printk(KERN_ERR DRIVER "unable to find PCI host\n"); + pr_err("unable to find PCI host\n"); return -ENODEV; } @@ -161,7 +158,7 @@ static int __devinit apple_bl_add(struct acpi_device *dev) pci_dev_put(host); if (!hw_data) { - printk(KERN_ERR DRIVER "unknown hardware\n"); + pr_err("unknown hardware\n"); return -ENODEV; } diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index bf5b1ece7160..297db2fa91f5 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -5,6 +5,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/init.h> #include <linux/device.h> @@ -123,7 +125,7 @@ static ssize_t backlight_store_power(struct device *dev, rc = -ENXIO; mutex_lock(&bd->ops_lock); if (bd->ops) { - pr_debug("backlight: set power to %lu\n", power); + pr_debug("set power to %lu\n", power); if (bd->props.power != power) { bd->props.power = power; backlight_update_status(bd); @@ -161,8 +163,7 @@ static ssize_t backlight_store_brightness(struct device *dev, if (brightness > bd->props.max_brightness) rc = -EINVAL; else { - pr_debug("backlight: set brightness to %lu\n", - brightness); + pr_debug("set brightness to %lu\n", brightness); bd->props.brightness = brightness; backlight_update_status(bd); rc = count; @@ -378,8 +379,8 @@ static int __init backlight_class_init(void) { backlight_class = class_create(THIS_MODULE, "backlight"); if (IS_ERR(backlight_class)) { - printk(KERN_WARNING "Unable to create backlight class; errno = %ld\n", - PTR_ERR(backlight_class)); + pr_warn("Unable to create backlight class; errno = %ld\n", + PTR_ERR(backlight_class)); return PTR_ERR(backlight_class); } diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c index 6dab13fe562e..23d732677ba1 100644 --- a/drivers/video/backlight/corgi_lcd.c +++ b/drivers/video/backlight/corgi_lcd.c @@ -544,7 +544,7 @@ static int __devinit corgi_lcd_probe(struct spi_device *spi) return -EINVAL; } - lcd = kzalloc(sizeof(struct corgi_lcd), GFP_KERNEL); + lcd = devm_kzalloc(&spi->dev, sizeof(struct corgi_lcd), GFP_KERNEL); if (!lcd) { dev_err(&spi->dev, "failed to allocate memory\n"); return -ENOMEM; @@ -554,10 +554,9 @@ static int __devinit corgi_lcd_probe(struct spi_device *spi) lcd->lcd_dev = lcd_device_register("corgi_lcd", &spi->dev, lcd, &corgi_lcd_ops); - if (IS_ERR(lcd->lcd_dev)) { - ret = PTR_ERR(lcd->lcd_dev); - goto err_free_lcd; - } + if (IS_ERR(lcd->lcd_dev)) + return PTR_ERR(lcd->lcd_dev); + lcd->power = FB_BLANK_POWERDOWN; lcd->mode = (pdata) ? pdata->init_mode : CORGI_LCD_MODE_VGA; @@ -591,8 +590,6 @@ err_unregister_bl: backlight_device_unregister(lcd->bl_dev); err_unregister_lcd: lcd_device_unregister(lcd->lcd_dev); -err_free_lcd: - kfree(lcd); return ret; } @@ -613,7 +610,6 @@ static int __devexit corgi_lcd_remove(struct spi_device *spi) corgi_lcd_set_power(lcd->lcd_dev, FB_BLANK_POWERDOWN); lcd_device_unregister(lcd->lcd_dev); - kfree(lcd); return 0; } diff --git a/drivers/video/backlight/cr_bllcd.c b/drivers/video/backlight/cr_bllcd.c index 22489eb5f3e0..37bae801e23b 100644 --- a/drivers/video/backlight/cr_bllcd.c +++ b/drivers/video/backlight/cr_bllcd.c @@ -27,6 +27,8 @@ * Alan Hourihane <alanh-at-tungstengraphics-dot-com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -180,14 +182,13 @@ static int cr_backlight_probe(struct platform_device *pdev) lpc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, CRVML_DEVICE_LPC, NULL); if (!lpc_dev) { - printk("INTEL CARILLO RANCH LPC not found.\n"); + pr_err("INTEL CARILLO RANCH LPC not found.\n"); return -ENODEV; } pci_read_config_byte(lpc_dev, CRVML_REG_GPIOEN, &dev_en); if (!(dev_en & CRVML_GPIOEN_BIT)) { - printk(KERN_ERR - "Carillo Ranch GPIO device was not enabled.\n"); + pr_err("Carillo Ranch GPIO device was not enabled.\n"); pci_dev_put(lpc_dev); return -ENODEV; } @@ -270,7 +271,7 @@ static int __init cr_backlight_init(void) return PTR_ERR(crp); } - printk("Carillo Ranch Backlight Driver Initialized.\n"); + pr_info("Carillo Ranch Backlight Driver Initialized.\n"); return 0; } diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c index 30e19681a30b..573c7ece0fde 100644 --- a/drivers/video/backlight/da903x_bl.c +++ b/drivers/video/backlight/da903x_bl.c @@ -136,6 +136,7 @@ static int da903x_backlight_probe(struct platform_device *pdev) da903x_write(data->da903x_dev, DA9034_WLED_CONTROL2, DA9034_WLED_ISET(pdata->output_current)); + memset(&props, 0, sizeof(props)); props.type = BACKLIGHT_RAW; props.max_brightness = max_brightness; bl = backlight_device_register(pdev->name, data->da903x_dev, data, diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c index 9ce6170c1860..8c660fcd250d 100644 --- a/drivers/video/backlight/generic_bl.c +++ b/drivers/video/backlight/generic_bl.c @@ -9,6 +9,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -106,7 +108,7 @@ static int genericbl_probe(struct platform_device *pdev) generic_backlight_device = bd; - printk("Generic Backlight Driver Initialized.\n"); + pr_info("Generic Backlight Driver Initialized.\n"); return 0; } @@ -120,7 +122,7 @@ static int genericbl_remove(struct platform_device *pdev) backlight_device_unregister(bd); - printk("Generic Backlight Driver Unloaded\n"); + pr_info("Generic Backlight Driver Unloaded\n"); return 0; } diff --git a/drivers/video/backlight/ili9320.c b/drivers/video/backlight/ili9320.c index 5118a9f029ab..6c9399341bcf 100644 --- a/drivers/video/backlight/ili9320.c +++ b/drivers/video/backlight/ili9320.c @@ -220,7 +220,7 @@ int __devinit ili9320_probe_spi(struct spi_device *spi, /* allocate and initialse our state */ - ili = kzalloc(sizeof(struct ili9320), GFP_KERNEL); + ili = devm_kzalloc(&spi->dev, sizeof(struct ili9320), GFP_KERNEL); if (ili == NULL) { dev_err(dev, "no memory for device\n"); return -ENOMEM; @@ -240,8 +240,7 @@ int __devinit ili9320_probe_spi(struct spi_device *spi, lcd = lcd_device_register("ili9320", dev, ili, &ili9320_ops); if (IS_ERR(lcd)) { dev_err(dev, "failed to register lcd device\n"); - ret = PTR_ERR(lcd); - goto err_free; + return PTR_ERR(lcd); } ili->lcd = lcd; @@ -259,9 +258,6 @@ int __devinit ili9320_probe_spi(struct spi_device *spi, err_unregister: lcd_device_unregister(lcd); - err_free: - kfree(ili); - return ret; } @@ -272,7 +268,6 @@ int __devexit ili9320_remove(struct ili9320 *ili) ili9320_power(ili, FB_BLANK_POWERDOWN); lcd_device_unregister(ili->lcd); - kfree(ili); return 0; } diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c index 2f8af5d786ab..16f593b64427 100644 --- a/drivers/video/backlight/jornada720_bl.c +++ b/drivers/video/backlight/jornada720_bl.c @@ -9,6 +9,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/backlight.h> #include <linux/device.h> #include <linux/fb.h> @@ -38,7 +40,7 @@ static int jornada_bl_get_brightness(struct backlight_device *bd) ret = jornada_ssp_byte(GETBRIGHTNESS); if (jornada_ssp_byte(GETBRIGHTNESS) != TXDUMMY) { - printk(KERN_ERR "bl : get brightness timeout\n"); + pr_err("get brightness timeout\n"); jornada_ssp_end(); return -ETIMEDOUT; } else /* exchange txdummy for value */ @@ -59,7 +61,7 @@ static int jornada_bl_update_status(struct backlight_device *bd) if ((bd->props.power != FB_BLANK_UNBLANK) || (bd->props.fb_blank != FB_BLANK_UNBLANK)) { ret = jornada_ssp_byte(BRIGHTNESSOFF); if (ret != TXDUMMY) { - printk(KERN_INFO "bl : brightness off timeout\n"); + pr_info("brightness off timeout\n"); /* turn off backlight */ PPSR &= ~PPC_LDD1; PPDR |= PPC_LDD1; @@ -70,7 +72,7 @@ static int jornada_bl_update_status(struct backlight_device *bd) /* send command to our mcu */ if (jornada_ssp_byte(SETBRIGHTNESS) != TXDUMMY) { - printk(KERN_INFO "bl : failed to set brightness\n"); + pr_info("failed to set brightness\n"); ret = -ETIMEDOUT; goto out; } @@ -81,7 +83,7 @@ static int jornada_bl_update_status(struct backlight_device *bd) but due to physical layout it is equal to 0, so we simply invert the value (MAX VALUE - NEW VALUE). */ if (jornada_ssp_byte(BL_MAX_BRIGHT - bd->props.brightness) != TXDUMMY) { - printk(KERN_ERR "bl : set brightness failed\n"); + pr_err("set brightness failed\n"); ret = -ETIMEDOUT; } @@ -113,7 +115,7 @@ static int jornada_bl_probe(struct platform_device *pdev) if (IS_ERR(bd)) { ret = PTR_ERR(bd); - printk(KERN_ERR "bl : failed to register device, err=%x\n", ret); + pr_err("failed to register device, err=%x\n", ret); return ret; } @@ -125,7 +127,7 @@ static int jornada_bl_probe(struct platform_device *pdev) jornada_bl_update_status(bd); platform_set_drvdata(pdev, bd); - printk(KERN_INFO "HP Jornada 700 series backlight driver\n"); + pr_info("HP Jornada 700 series backlight driver\n"); return 0; } diff --git a/drivers/video/backlight/jornada720_lcd.c b/drivers/video/backlight/jornada720_lcd.c index 22d231a17e3c..635b30523fd5 100644 --- a/drivers/video/backlight/jornada720_lcd.c +++ b/drivers/video/backlight/jornada720_lcd.c @@ -9,6 +9,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/device.h> #include <linux/fb.h> #include <linux/kernel.h> @@ -44,7 +46,7 @@ static int jornada_lcd_get_contrast(struct lcd_device *dev) jornada_ssp_start(); if (jornada_ssp_byte(GETCONTRAST) != TXDUMMY) { - printk(KERN_ERR "lcd: get contrast failed\n"); + pr_err("get contrast failed\n"); jornada_ssp_end(); return -ETIMEDOUT; } else { @@ -65,7 +67,7 @@ static int jornada_lcd_set_contrast(struct lcd_device *dev, int value) /* push the new value */ if (jornada_ssp_byte(value) != TXDUMMY) { - printk(KERN_ERR "lcd : set contrast failed\n"); + pr_err("set contrast failed\n"); jornada_ssp_end(); return -ETIMEDOUT; } @@ -103,7 +105,7 @@ static int jornada_lcd_probe(struct platform_device *pdev) if (IS_ERR(lcd_device)) { ret = PTR_ERR(lcd_device); - printk(KERN_ERR "lcd : failed to register device\n"); + pr_err("failed to register device\n"); return ret; } diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c index 6022b67285ec..40f606a86093 100644 --- a/drivers/video/backlight/l4f00242t03.c +++ b/drivers/video/backlight/l4f00242t03.c @@ -11,6 +11,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/device.h> #include <linux/kernel.h> #include <linux/delay.h> @@ -159,7 +161,8 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi) return -EINVAL; } - priv = kzalloc(sizeof(struct l4f00242t03_priv), GFP_KERNEL); + priv = devm_kzalloc(&spi->dev, sizeof(struct l4f00242t03_priv), + GFP_KERNEL); if (priv == NULL) { dev_err(&spi->dev, "No memory for this device.\n"); @@ -177,7 +180,7 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi) if (ret) { dev_err(&spi->dev, "Unable to get the lcd l4f00242t03 reset gpio.\n"); - goto err; + return ret; } ret = gpio_request_one(pdata->data_enable_gpio, GPIOF_OUT_INIT_LOW, @@ -185,7 +188,7 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi) if (ret) { dev_err(&spi->dev, "Unable to get the lcd l4f00242t03 data en gpio.\n"); - goto err2; + goto err; } priv->io_reg = regulator_get(&spi->dev, "vdd"); @@ -193,7 +196,7 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi) ret = PTR_ERR(priv->io_reg); dev_err(&spi->dev, "%s: Unable to get the IO regulator\n", __func__); - goto err3; + goto err2; } priv->core_reg = regulator_get(&spi->dev, "vcore"); @@ -201,14 +204,14 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi) ret = PTR_ERR(priv->core_reg); dev_err(&spi->dev, "%s: Unable to get the core regulator\n", __func__); - goto err4; + goto err3; } priv->ld = lcd_device_register("l4f00242t03", &spi->dev, priv, &l4f_ops); if (IS_ERR(priv->ld)) { ret = PTR_ERR(priv->ld); - goto err5; + goto err4; } /* Init the LCD */ @@ -220,16 +223,14 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi) return 0; -err5: - regulator_put(priv->core_reg); err4: - regulator_put(priv->io_reg); + regulator_put(priv->core_reg); err3: - gpio_free(pdata->data_enable_gpio); + regulator_put(priv->io_reg); err2: - gpio_free(pdata->reset_gpio); + gpio_free(pdata->data_enable_gpio); err: - kfree(priv); + gpio_free(pdata->reset_gpio); return ret; } @@ -250,8 +251,6 @@ static int __devexit l4f00242t03_remove(struct spi_device *spi) regulator_put(priv->io_reg); regulator_put(priv->core_reg); - kfree(priv); - return 0; } diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c index 79c1b0d609a8..a5d0d024bb92 100644 --- a/drivers/video/backlight/lcd.c +++ b/drivers/video/backlight/lcd.c @@ -5,6 +5,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/init.h> #include <linux/device.h> @@ -32,6 +34,8 @@ static int fb_notifier_callback(struct notifier_block *self, case FB_EVENT_BLANK: case FB_EVENT_MODE_CHANGE: case FB_EVENT_MODE_CHANGE_ALL: + case FB_EARLY_EVENT_BLANK: + case FB_R_EARLY_EVENT_BLANK: break; default: return 0; @@ -46,6 +50,14 @@ static int fb_notifier_callback(struct notifier_block *self, if (event == FB_EVENT_BLANK) { if (ld->ops->set_power) ld->ops->set_power(ld, *(int *)evdata->data); + } else if (event == FB_EARLY_EVENT_BLANK) { + if (ld->ops->early_set_power) + ld->ops->early_set_power(ld, + *(int *)evdata->data); + } else if (event == FB_R_EARLY_EVENT_BLANK) { + if (ld->ops->r_early_set_power) + ld->ops->r_early_set_power(ld, + *(int *)evdata->data); } else { if (ld->ops->set_mode) ld->ops->set_mode(ld, evdata->data); @@ -106,7 +118,7 @@ static ssize_t lcd_store_power(struct device *dev, mutex_lock(&ld->ops_lock); if (ld->ops && ld->ops->set_power) { - pr_debug("lcd: set power to %lu\n", power); + pr_debug("set power to %lu\n", power); ld->ops->set_power(ld, power); rc = count; } @@ -142,7 +154,7 @@ static ssize_t lcd_store_contrast(struct device *dev, mutex_lock(&ld->ops_lock); if (ld->ops && ld->ops->set_contrast) { - pr_debug("lcd: set contrast to %lu\n", contrast); + pr_debug("set contrast to %lu\n", contrast); ld->ops->set_contrast(ld, contrast); rc = count; } @@ -253,8 +265,8 @@ static int __init lcd_class_init(void) { lcd_class = class_create(THIS_MODULE, "lcd"); if (IS_ERR(lcd_class)) { - printk(KERN_WARNING "Unable to create backlight class; errno = %ld\n", - PTR_ERR(lcd_class)); + pr_warn("Unable to create backlight class; errno = %ld\n", + PTR_ERR(lcd_class)); return PTR_ERR(lcd_class); } diff --git a/drivers/video/backlight/ld9040.c b/drivers/video/backlight/ld9040.c index efd352be21ae..58f517fb7d40 100644 --- a/drivers/video/backlight/ld9040.c +++ b/drivers/video/backlight/ld9040.c @@ -707,7 +707,7 @@ static int ld9040_probe(struct spi_device *spi) struct backlight_device *bd = NULL; struct backlight_properties props; - lcd = kzalloc(sizeof(struct ld9040), GFP_KERNEL); + lcd = devm_kzalloc(&spi->dev, sizeof(struct ld9040), GFP_KERNEL); if (!lcd) return -ENOMEM; @@ -717,7 +717,7 @@ static int ld9040_probe(struct spi_device *spi) ret = spi_setup(spi); if (ret < 0) { dev_err(&spi->dev, "spi setup failed.\n"); - goto out_free_lcd; + return ret; } lcd->spi = spi; @@ -726,7 +726,7 @@ static int ld9040_probe(struct spi_device *spi) lcd->lcd_pd = spi->dev.platform_data; if (!lcd->lcd_pd) { dev_err(&spi->dev, "platform data is NULL.\n"); - goto out_free_lcd; + return -EFAULT; } mutex_init(&lcd->lock); @@ -734,13 +734,13 @@ static int ld9040_probe(struct spi_device *spi) ret = regulator_bulk_get(lcd->dev, ARRAY_SIZE(supplies), supplies); if (ret) { dev_err(lcd->dev, "Failed to get regulators: %d\n", ret); - goto out_free_lcd; + return ret; } ld = lcd_device_register("ld9040", &spi->dev, lcd, &ld9040_lcd_ops); if (IS_ERR(ld)) { ret = PTR_ERR(ld); - goto out_free_lcd; + goto out_free_regulator; } lcd->ld = ld; @@ -782,10 +782,9 @@ static int ld9040_probe(struct spi_device *spi) out_unregister_lcd: lcd_device_unregister(lcd->ld); -out_free_lcd: +out_free_regulator: regulator_bulk_free(ARRAY_SIZE(supplies), supplies); - kfree(lcd); return ret; } @@ -797,7 +796,6 @@ static int __devexit ld9040_remove(struct spi_device *spi) backlight_device_unregister(lcd->bd); lcd_device_unregister(lcd->ld); regulator_bulk_free(ARRAY_SIZE(supplies), supplies); - kfree(lcd); return 0; } @@ -846,7 +844,6 @@ static void ld9040_shutdown(struct spi_device *spi) static struct spi_driver ld9040_driver = { .driver = { .name = "ld9040", - .bus = &spi_bus_type, .owner = THIS_MODULE, }, .probe = ld9040_probe, diff --git a/drivers/video/backlight/lm3533_bl.c b/drivers/video/backlight/lm3533_bl.c new file mode 100644 index 000000000000..bebeb63607db --- /dev/null +++ b/drivers/video/backlight/lm3533_bl.c @@ -0,0 +1,423 @@ +/* + * lm3533-bl.c -- LM3533 Backlight driver + * + * Copyright (C) 2011-2012 Texas Instruments + * + * Author: Johan Hovold <jhovold@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/backlight.h> +#include <linux/fb.h> +#include <linux/slab.h> + +#include <linux/mfd/lm3533.h> + + +#define LM3533_HVCTRLBANK_COUNT 2 +#define LM3533_BL_MAX_BRIGHTNESS 255 + +#define LM3533_REG_CTRLBANK_AB_BCONF 0x1a + + +struct lm3533_bl { + struct lm3533 *lm3533; + struct lm3533_ctrlbank cb; + struct backlight_device *bd; + int id; +}; + + +static inline int lm3533_bl_get_ctrlbank_id(struct lm3533_bl *bl) +{ + return bl->id; +} + +static int lm3533_bl_update_status(struct backlight_device *bd) +{ + struct lm3533_bl *bl = bl_get_data(bd); + int brightness = bd->props.brightness; + + if (bd->props.power != FB_BLANK_UNBLANK) + brightness = 0; + if (bd->props.fb_blank != FB_BLANK_UNBLANK) + brightness = 0; + + return lm3533_ctrlbank_set_brightness(&bl->cb, (u8)brightness); +} + +static int lm3533_bl_get_brightness(struct backlight_device *bd) +{ + struct lm3533_bl *bl = bl_get_data(bd); + u8 val; + int ret; + + ret = lm3533_ctrlbank_get_brightness(&bl->cb, &val); + if (ret) + return ret; + + return val; +} + +static const struct backlight_ops lm3533_bl_ops = { + .get_brightness = lm3533_bl_get_brightness, + .update_status = lm3533_bl_update_status, +}; + +static ssize_t show_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lm3533_bl *bl = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", bl->id); +} + +static ssize_t show_als_channel(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lm3533_bl *bl = dev_get_drvdata(dev); + unsigned channel = lm3533_bl_get_ctrlbank_id(bl); + + return scnprintf(buf, PAGE_SIZE, "%u\n", channel); +} + +static ssize_t show_als_en(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lm3533_bl *bl = dev_get_drvdata(dev); + int ctrlbank = lm3533_bl_get_ctrlbank_id(bl); + u8 val; + u8 mask; + bool enable; + int ret; + + ret = lm3533_read(bl->lm3533, LM3533_REG_CTRLBANK_AB_BCONF, &val); + if (ret) + return ret; + + mask = 1 << (2 * ctrlbank); + enable = val & mask; + + return scnprintf(buf, PAGE_SIZE, "%d\n", enable); +} + +static ssize_t store_als_en(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct lm3533_bl *bl = dev_get_drvdata(dev); + int ctrlbank = lm3533_bl_get_ctrlbank_id(bl); + int enable; + u8 val; + u8 mask; + int ret; + + if (kstrtoint(buf, 0, &enable)) + return -EINVAL; + + mask = 1 << (2 * ctrlbank); + + if (enable) + val = mask; + else + val = 0; + + ret = lm3533_update(bl->lm3533, LM3533_REG_CTRLBANK_AB_BCONF, val, + mask); + if (ret) + return ret; + + return len; +} + +static ssize_t show_linear(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lm3533_bl *bl = dev_get_drvdata(dev); + u8 val; + u8 mask; + int linear; + int ret; + + ret = lm3533_read(bl->lm3533, LM3533_REG_CTRLBANK_AB_BCONF, &val); + if (ret) + return ret; + + mask = 1 << (2 * lm3533_bl_get_ctrlbank_id(bl) + 1); + + if (val & mask) + linear = 1; + else + linear = 0; + + return scnprintf(buf, PAGE_SIZE, "%x\n", linear); +} + +static ssize_t store_linear(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct lm3533_bl *bl = dev_get_drvdata(dev); + unsigned long linear; + u8 mask; + u8 val; + int ret; + + if (kstrtoul(buf, 0, &linear)) + return -EINVAL; + + mask = 1 << (2 * lm3533_bl_get_ctrlbank_id(bl) + 1); + + if (linear) + val = mask; + else + val = 0; + + ret = lm3533_update(bl->lm3533, LM3533_REG_CTRLBANK_AB_BCONF, val, + mask); + if (ret) + return ret; + + return len; +} + +static ssize_t show_pwm(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct lm3533_bl *bl = dev_get_drvdata(dev); + u8 val; + int ret; + + ret = lm3533_ctrlbank_get_pwm(&bl->cb, &val); + if (ret) + return ret; + + return scnprintf(buf, PAGE_SIZE, "%u\n", val); +} + +static ssize_t store_pwm(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct lm3533_bl *bl = dev_get_drvdata(dev); + u8 val; + int ret; + + if (kstrtou8(buf, 0, &val)) + return -EINVAL; + + ret = lm3533_ctrlbank_set_pwm(&bl->cb, val); + if (ret) + return ret; + + return len; +} + +static LM3533_ATTR_RO(als_channel); +static LM3533_ATTR_RW(als_en); +static LM3533_ATTR_RO(id); +static LM3533_ATTR_RW(linear); +static LM3533_ATTR_RW(pwm); + +static struct attribute *lm3533_bl_attributes[] = { + &dev_attr_als_channel.attr, + &dev_attr_als_en.attr, + &dev_attr_id.attr, + &dev_attr_linear.attr, + &dev_attr_pwm.attr, + NULL, +}; + +static umode_t lm3533_bl_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct lm3533_bl *bl = dev_get_drvdata(dev); + umode_t mode = attr->mode; + + if (attr == &dev_attr_als_channel.attr || + attr == &dev_attr_als_en.attr) { + if (!bl->lm3533->have_als) + mode = 0; + } + + return mode; +}; + +static struct attribute_group lm3533_bl_attribute_group = { + .is_visible = lm3533_bl_attr_is_visible, + .attrs = lm3533_bl_attributes +}; + +static int __devinit lm3533_bl_setup(struct lm3533_bl *bl, + struct lm3533_bl_platform_data *pdata) +{ + int ret; + + ret = lm3533_ctrlbank_set_max_current(&bl->cb, pdata->max_current); + if (ret) + return ret; + + return lm3533_ctrlbank_set_pwm(&bl->cb, pdata->pwm); +} + +static int __devinit lm3533_bl_probe(struct platform_device *pdev) +{ + struct lm3533 *lm3533; + struct lm3533_bl_platform_data *pdata; + struct lm3533_bl *bl; + struct backlight_device *bd; + struct backlight_properties props; + int ret; + + dev_dbg(&pdev->dev, "%s\n", __func__); + + lm3533 = dev_get_drvdata(pdev->dev.parent); + if (!lm3533) + return -EINVAL; + + pdata = pdev->dev.platform_data; + if (!pdata) { + dev_err(&pdev->dev, "no platform data\n"); + return -EINVAL; + } + + if (pdev->id < 0 || pdev->id >= LM3533_HVCTRLBANK_COUNT) { + dev_err(&pdev->dev, "illegal backlight id %d\n", pdev->id); + return -EINVAL; + } + + bl = kzalloc(sizeof(*bl), GFP_KERNEL); + if (!bl) { + dev_err(&pdev->dev, + "failed to allocate memory for backlight\n"); + return -ENOMEM; + } + + bl->lm3533 = lm3533; + bl->id = pdev->id; + + bl->cb.lm3533 = lm3533; + bl->cb.id = lm3533_bl_get_ctrlbank_id(bl); + bl->cb.dev = NULL; /* until registered */ + + memset(&props, 0, sizeof(props)); + props.type = BACKLIGHT_RAW; + props.max_brightness = LM3533_BL_MAX_BRIGHTNESS; + props.brightness = pdata->default_brightness; + bd = backlight_device_register(pdata->name, pdev->dev.parent, bl, + &lm3533_bl_ops, &props); + if (IS_ERR(bd)) { + dev_err(&pdev->dev, "failed to register backlight device\n"); + ret = PTR_ERR(bd); + goto err_free; + } + + bl->bd = bd; + bl->cb.dev = &bl->bd->dev; + + platform_set_drvdata(pdev, bl); + + ret = sysfs_create_group(&bd->dev.kobj, &lm3533_bl_attribute_group); + if (ret < 0) { + dev_err(&pdev->dev, "failed to create sysfs attributes\n"); + goto err_unregister; + } + + backlight_update_status(bd); + + ret = lm3533_bl_setup(bl, pdata); + if (ret) + goto err_sysfs_remove; + + ret = lm3533_ctrlbank_enable(&bl->cb); + if (ret) + goto err_sysfs_remove; + + return 0; + +err_sysfs_remove: + sysfs_remove_group(&bd->dev.kobj, &lm3533_bl_attribute_group); +err_unregister: + backlight_device_unregister(bd); +err_free: + kfree(bl); + + return ret; +} + +static int __devexit lm3533_bl_remove(struct platform_device *pdev) +{ + struct lm3533_bl *bl = platform_get_drvdata(pdev); + struct backlight_device *bd = bl->bd; + + dev_dbg(&bd->dev, "%s\n", __func__); + + bd->props.power = FB_BLANK_POWERDOWN; + bd->props.brightness = 0; + + lm3533_ctrlbank_disable(&bl->cb); + sysfs_remove_group(&bd->dev.kobj, &lm3533_bl_attribute_group); + backlight_device_unregister(bd); + kfree(bl); + + return 0; +} + +#ifdef CONFIG_PM +static int lm3533_bl_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct lm3533_bl *bl = platform_get_drvdata(pdev); + + dev_dbg(&pdev->dev, "%s\n", __func__); + + return lm3533_ctrlbank_disable(&bl->cb); +} + +static int lm3533_bl_resume(struct platform_device *pdev) +{ + struct lm3533_bl *bl = platform_get_drvdata(pdev); + + dev_dbg(&pdev->dev, "%s\n", __func__); + + return lm3533_ctrlbank_enable(&bl->cb); +} +#else +#define lm3533_bl_suspend NULL +#define lm3533_bl_resume NULL +#endif + +static void lm3533_bl_shutdown(struct platform_device *pdev) +{ + struct lm3533_bl *bl = platform_get_drvdata(pdev); + + dev_dbg(&pdev->dev, "%s\n", __func__); + + lm3533_ctrlbank_disable(&bl->cb); +} + +static struct platform_driver lm3533_bl_driver = { + .driver = { + .name = "lm3533-backlight", + .owner = THIS_MODULE, + }, + .probe = lm3533_bl_probe, + .remove = __devexit_p(lm3533_bl_remove), + .shutdown = lm3533_bl_shutdown, + .suspend = lm3533_bl_suspend, + .resume = lm3533_bl_resume, +}; +module_platform_driver(lm3533_bl_driver); + +MODULE_AUTHOR("Johan Hovold <jhovold@gmail.com>"); +MODULE_DESCRIPTION("LM3533 Backlight driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:lm3533-backlight"); diff --git a/drivers/video/backlight/lms283gf05.c b/drivers/video/backlight/lms283gf05.c index 4161f9e3982a..a9f2c36966f1 100644 --- a/drivers/video/backlight/lms283gf05.c +++ b/drivers/video/backlight/lms283gf05.c @@ -168,7 +168,8 @@ static int __devinit lms283gf05_probe(struct spi_device *spi) goto err; } - st = kzalloc(sizeof(struct lms283gf05_state), GFP_KERNEL); + st = devm_kzalloc(&spi->dev, sizeof(struct lms283gf05_state), + GFP_KERNEL); if (st == NULL) { dev_err(&spi->dev, "No memory for device state\n"); ret = -ENOMEM; @@ -178,7 +179,7 @@ static int __devinit lms283gf05_probe(struct spi_device *spi) ld = lcd_device_register("lms283gf05", &spi->dev, st, &lms_ops); if (IS_ERR(ld)) { ret = PTR_ERR(ld); - goto err2; + goto err; } st->spi = spi; @@ -193,8 +194,6 @@ static int __devinit lms283gf05_probe(struct spi_device *spi) return 0; -err2: - kfree(st); err: if (pdata != NULL) gpio_free(pdata->reset_gpio); @@ -212,8 +211,6 @@ static int __devexit lms283gf05_remove(struct spi_device *spi) if (pdata != NULL) gpio_free(pdata->reset_gpio); - kfree(st); - return 0; } diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c index 333949ff3265..6c0f1ac0d32a 100644 --- a/drivers/video/backlight/ltv350qv.c +++ b/drivers/video/backlight/ltv350qv.c @@ -232,23 +232,20 @@ static int __devinit ltv350qv_probe(struct spi_device *spi) struct lcd_device *ld; int ret; - lcd = kzalloc(sizeof(struct ltv350qv), GFP_KERNEL); + lcd = devm_kzalloc(&spi->dev, sizeof(struct ltv350qv), GFP_KERNEL); if (!lcd) return -ENOMEM; lcd->spi = spi; lcd->power = FB_BLANK_POWERDOWN; - lcd->buffer = kzalloc(8, GFP_KERNEL); - if (!lcd->buffer) { - ret = -ENOMEM; - goto out_free_lcd; - } + lcd->buffer = devm_kzalloc(&spi->dev, 8, GFP_KERNEL); + if (!lcd->buffer) + return -ENOMEM; ld = lcd_device_register("ltv350qv", &spi->dev, lcd, <v_ops); - if (IS_ERR(ld)) { - ret = PTR_ERR(ld); - goto out_free_buffer; - } + if (IS_ERR(ld)) + return PTR_ERR(ld); + lcd->ld = ld; ret = ltv350qv_power(lcd, FB_BLANK_UNBLANK); @@ -261,10 +258,6 @@ static int __devinit ltv350qv_probe(struct spi_device *spi) out_unregister: lcd_device_unregister(ld); -out_free_buffer: - kfree(lcd->buffer); -out_free_lcd: - kfree(lcd); return ret; } @@ -274,8 +267,6 @@ static int __devexit ltv350qv_remove(struct spi_device *spi) ltv350qv_power(lcd, FB_BLANK_POWERDOWN); lcd_device_unregister(lcd->ld); - kfree(lcd->buffer); - kfree(lcd); return 0; } @@ -310,7 +301,6 @@ static void ltv350qv_shutdown(struct spi_device *spi) static struct spi_driver ltv350qv_driver = { .driver = { .name = "ltv350qv", - .bus = &spi_bus_type, .owner = THIS_MODULE, }, diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c index 0175bfb08a1c..bfdc5fbeaa11 100644 --- a/drivers/video/backlight/omap1_bl.c +++ b/drivers/video/backlight/omap1_bl.c @@ -18,6 +18,8 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -168,7 +170,7 @@ static int omapbl_probe(struct platform_device *pdev) dev->props.brightness = pdata->default_intensity; omapbl_update_status(dev); - printk(KERN_INFO "OMAP LCD backlight initialised\n"); + pr_info("OMAP LCD backlight initialised\n"); return 0; } diff --git a/drivers/video/backlight/pcf50633-backlight.c b/drivers/video/backlight/pcf50633-backlight.c index c65853cb9740..c092159f4383 100644 --- a/drivers/video/backlight/pcf50633-backlight.c +++ b/drivers/video/backlight/pcf50633-backlight.c @@ -111,6 +111,7 @@ static int __devinit pcf50633_bl_probe(struct platform_device *pdev) if (!pcf_bl) return -ENOMEM; + memset(&bl_props, 0, sizeof(bl_props)); bl_props.type = BACKLIGHT_RAW; bl_props.max_brightness = 0x3f; bl_props.power = FB_BLANK_UNBLANK; diff --git a/drivers/video/backlight/progear_bl.c b/drivers/video/backlight/progear_bl.c index 6af183d6465e..69b35f02929e 100644 --- a/drivers/video/backlight/progear_bl.c +++ b/drivers/video/backlight/progear_bl.c @@ -15,6 +15,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -68,13 +70,13 @@ static int progearbl_probe(struct platform_device *pdev) pmu_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M7101, NULL); if (!pmu_dev) { - printk("ALI M7101 PMU not found.\n"); + pr_err("ALI M7101 PMU not found.\n"); return -ENODEV; } sb_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); if (!sb_dev) { - printk("ALI 1533 SB not found.\n"); + pr_err("ALI 1533 SB not found.\n"); ret = -ENODEV; goto put_pmu; } diff --git a/drivers/video/backlight/s6e63m0.c b/drivers/video/backlight/s6e63m0.c index e264f55b2574..6437ae474cf2 100644 --- a/drivers/video/backlight/s6e63m0.c +++ b/drivers/video/backlight/s6e63m0.c @@ -741,7 +741,7 @@ static int __devinit s6e63m0_probe(struct spi_device *spi) struct backlight_device *bd = NULL; struct backlight_properties props; - lcd = kzalloc(sizeof(struct s6e63m0), GFP_KERNEL); + lcd = devm_kzalloc(&spi->dev, sizeof(struct s6e63m0), GFP_KERNEL); if (!lcd) return -ENOMEM; @@ -751,7 +751,7 @@ static int __devinit s6e63m0_probe(struct spi_device *spi) ret = spi_setup(spi); if (ret < 0) { dev_err(&spi->dev, "spi setup failed.\n"); - goto out_free_lcd; + return ret; } lcd->spi = spi; @@ -760,14 +760,12 @@ static int __devinit s6e63m0_probe(struct spi_device *spi) lcd->lcd_pd = (struct lcd_platform_data *)spi->dev.platform_data; if (!lcd->lcd_pd) { dev_err(&spi->dev, "platform data is NULL.\n"); - goto out_free_lcd; + return -EFAULT; } ld = lcd_device_register("s6e63m0", &spi->dev, lcd, &s6e63m0_lcd_ops); - if (IS_ERR(ld)) { - ret = PTR_ERR(ld); - goto out_free_lcd; - } + if (IS_ERR(ld)) + return PTR_ERR(ld); lcd->ld = ld; @@ -824,8 +822,6 @@ static int __devinit s6e63m0_probe(struct spi_device *spi) out_lcd_unregister: lcd_device_unregister(ld); -out_free_lcd: - kfree(lcd); return ret; } @@ -838,7 +834,6 @@ static int __devexit s6e63m0_remove(struct spi_device *spi) device_remove_file(&spi->dev, &dev_attr_gamma_mode); backlight_device_unregister(lcd->bd); lcd_device_unregister(lcd->ld); - kfree(lcd); return 0; } @@ -899,7 +894,6 @@ static void s6e63m0_shutdown(struct spi_device *spi) static struct spi_driver s6e63m0_driver = { .driver = { .name = "s6e63m0", - .bus = &spi_bus_type, .owner = THIS_MODULE, }, .probe = s6e63m0_probe, diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c index 2368b8e5f89e..02444d042cd5 100644 --- a/drivers/video/backlight/tdo24m.c +++ b/drivers/video/backlight/tdo24m.c @@ -349,7 +349,7 @@ static int __devinit tdo24m_probe(struct spi_device *spi) if (err) return err; - lcd = kzalloc(sizeof(struct tdo24m), GFP_KERNEL); + lcd = devm_kzalloc(&spi->dev, sizeof(struct tdo24m), GFP_KERNEL); if (!lcd) return -ENOMEM; @@ -357,11 +357,9 @@ static int __devinit tdo24m_probe(struct spi_device *spi) lcd->power = FB_BLANK_POWERDOWN; lcd->mode = MODE_VGA; /* default to VGA */ - lcd->buf = kmalloc(TDO24M_SPI_BUFF_SIZE, GFP_KERNEL); - if (lcd->buf == NULL) { - kfree(lcd); + lcd->buf = devm_kzalloc(&spi->dev, TDO24M_SPI_BUFF_SIZE, GFP_KERNEL); + if (lcd->buf == NULL) return -ENOMEM; - } m = &lcd->msg; x = &lcd->xfer; @@ -383,15 +381,13 @@ static int __devinit tdo24m_probe(struct spi_device *spi) break; default: dev_err(&spi->dev, "Unsupported model"); - goto out_free; + return -EINVAL; } lcd->lcd_dev = lcd_device_register("tdo24m", &spi->dev, lcd, &tdo24m_ops); - if (IS_ERR(lcd->lcd_dev)) { - err = PTR_ERR(lcd->lcd_dev); - goto out_free; - } + if (IS_ERR(lcd->lcd_dev)) + return PTR_ERR(lcd->lcd_dev); dev_set_drvdata(&spi->dev, lcd); err = tdo24m_power(lcd, FB_BLANK_UNBLANK); @@ -402,9 +398,6 @@ static int __devinit tdo24m_probe(struct spi_device *spi) out_unregister: lcd_device_unregister(lcd->lcd_dev); -out_free: - kfree(lcd->buf); - kfree(lcd); return err; } @@ -414,8 +407,6 @@ static int __devexit tdo24m_remove(struct spi_device *spi) tdo24m_power(lcd, FB_BLANK_POWERDOWN); lcd_device_unregister(lcd->lcd_dev); - kfree(lcd->buf); - kfree(lcd); return 0; } diff --git a/drivers/video/backlight/tosa_bl.c b/drivers/video/backlight/tosa_bl.c index 2b241abced43..0d54e607e82d 100644 --- a/drivers/video/backlight/tosa_bl.c +++ b/drivers/video/backlight/tosa_bl.c @@ -82,8 +82,11 @@ static int __devinit tosa_bl_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct backlight_properties props; - struct tosa_bl_data *data = kzalloc(sizeof(struct tosa_bl_data), GFP_KERNEL); + struct tosa_bl_data *data; int ret = 0; + + data = devm_kzalloc(&client->dev, sizeof(struct tosa_bl_data), + GFP_KERNEL); if (!data) return -ENOMEM; @@ -92,7 +95,7 @@ static int __devinit tosa_bl_probe(struct i2c_client *client, ret = gpio_request(TOSA_GPIO_BL_C20MA, "backlight"); if (ret) { dev_dbg(&data->bl->dev, "Unable to request gpio!\n"); - goto err_gpio_bl; + return ret; } ret = gpio_direction_output(TOSA_GPIO_BL_C20MA, 0); if (ret) @@ -122,8 +125,6 @@ err_reg: data->bl = NULL; err_gpio_dir: gpio_free(TOSA_GPIO_BL_C20MA); -err_gpio_bl: - kfree(data); return ret; } @@ -136,8 +137,6 @@ static int __devexit tosa_bl_remove(struct i2c_client *client) gpio_free(TOSA_GPIO_BL_C20MA); - kfree(data); - return 0; } diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c index 2231aec23918..47823b8efff0 100644 --- a/drivers/video/backlight/tosa_lcd.c +++ b/drivers/video/backlight/tosa_lcd.c @@ -174,7 +174,8 @@ static int __devinit tosa_lcd_probe(struct spi_device *spi) int ret; struct tosa_lcd_data *data; - data = kzalloc(sizeof(struct tosa_lcd_data), GFP_KERNEL); + data = devm_kzalloc(&spi->dev, sizeof(struct tosa_lcd_data), + GFP_KERNEL); if (!data) return -ENOMEM; @@ -187,7 +188,7 @@ static int __devinit tosa_lcd_probe(struct spi_device *spi) ret = spi_setup(spi); if (ret < 0) - goto err_spi; + return ret; data->spi = spi; dev_set_drvdata(&spi->dev, data); @@ -224,8 +225,6 @@ err_gpio_dir: gpio_free(TOSA_GPIO_TG_ON); err_gpio_tg: dev_set_drvdata(&spi->dev, NULL); -err_spi: - kfree(data); return ret; } @@ -242,7 +241,6 @@ static int __devexit tosa_lcd_remove(struct spi_device *spi) gpio_free(TOSA_GPIO_TG_ON); dev_set_drvdata(&spi->dev, NULL); - kfree(data); return 0; } diff --git a/drivers/video/backlight/wm831x_bl.c b/drivers/video/backlight/wm831x_bl.c index 5d365deb5f82..9e5517a3a52b 100644 --- a/drivers/video/backlight/wm831x_bl.c +++ b/drivers/video/backlight/wm831x_bl.c @@ -194,6 +194,7 @@ static int wm831x_backlight_probe(struct platform_device *pdev) data->current_brightness = 0; data->isink_reg = isink_reg; + memset(&props, 0, sizeof(props)); props.type = BACKLIGHT_RAW; props.max_brightness = max_isel; bl = backlight_device_register("wm831x", &pdev->dev, data, diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index c6ce416ab587..0dff12a1daef 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1046,20 +1046,29 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) int fb_blank(struct fb_info *info, int blank) { - int ret = -EINVAL; + struct fb_event event; + int ret = -EINVAL, early_ret; if (blank > FB_BLANK_POWERDOWN) blank = FB_BLANK_POWERDOWN; + event.info = info; + event.data = ␣ + + early_ret = fb_notifier_call_chain(FB_EARLY_EVENT_BLANK, &event); + if (info->fbops->fb_blank) ret = info->fbops->fb_blank(blank, info); - if (!ret) { - struct fb_event event; - - event.info = info; - event.data = ␣ + if (!ret) fb_notifier_call_chain(FB_EVENT_BLANK, &event); + else { + /* + * if fb_blank is failed then revert effects of + * the early blank event. + */ + if (!early_ret) + fb_notifier_call_chain(FB_R_EARLY_EVENT_BLANK, &event); } return ret; diff --git a/drivers/video/omap2/displays/panel-acx565akm.c b/drivers/video/omap2/displays/panel-acx565akm.c index d26f37ac69d8..74e7cf078505 100644 --- a/drivers/video/omap2/displays/panel-acx565akm.c +++ b/drivers/video/omap2/displays/panel-acx565akm.c @@ -532,6 +532,7 @@ static int acx_panel_probe(struct omap_dss_device *dssdev) /*------- Backlight control --------*/ + memset(&props, 0, sizeof(props)); props.fb_blank = FB_BLANK_UNBLANK; props.power = FB_BLANK_UNBLANK; props.type = BACKLIGHT_RAW; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 37268c5bb98b..1b35d6bd06b0 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -292,7 +292,6 @@ static const struct inode_operations bad_inode_ops = .getxattr = bad_inode_getxattr, .listxattr = bad_inode_listxattr, .removexattr = bad_inode_removexattr, - /* truncate_range returns void */ }; diff --git a/fs/proc/base.c b/fs/proc/base.c index d2d3108a611c..d7d711876b6a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -411,12 +411,13 @@ static const struct file_operations proc_lstats_operations = { static int proc_oom_score(struct task_struct *task, char *buffer) { + unsigned long totalpages = totalram_pages + total_swap_pages; unsigned long points = 0; read_lock(&tasklist_lock); if (pid_alive(task)) - points = oom_badness(task, NULL, NULL, - totalram_pages + total_swap_pages); + points = oom_badness(task, NULL, NULL, totalpages) * + 1000 / totalpages; read_unlock(&tasklist_lock); return sprintf(buffer, "%lu\n", points); } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 1030a716d155..7faaf2acc570 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -784,7 +784,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); - if (pmd_trans_huge_lock(pmd, vma) == 1) { + if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { for (; addr != end; addr += PAGE_SIZE) { unsigned long offset; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e2768f188f55..6f2b45a9b6bc 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -445,6 +445,18 @@ static inline int pmd_write(pmd_t pmd) #endif /* __HAVE_ARCH_PMD_WRITE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifndef pmd_read_atomic +static inline pmd_t pmd_read_atomic(pmd_t *pmdp) +{ + /* + * Depend on compiler for an atomic pmd read. NOTE: this is + * only going to work, if the pmdval_t isn't larger than + * an unsigned long. + */ + return *pmdp; +} +#endif + /* * This function is meant to be used by sites walking pagetables with * the mmap_sem hold in read mode to protect against MADV_DONTNEED and @@ -458,11 +470,17 @@ static inline int pmd_write(pmd_t pmd) * undefined so behaving like if the pmd was none is safe (because it * can return none anyway). The compiler level barrier() is critically * important to compute the two checks atomically on the same pmdval. + * + * For 32bit kernels with a 64bit large pmd_t this automatically takes + * care of reading the pmd atomically to avoid SMP race conditions + * against pmd_populate() when the mmap_sem is hold for reading by the + * caller (a special atomic read not done by "gcc" as in the generic + * version above, is also needed when THP is disabled because the page + * fault can populate the pmd from under us). */ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) { - /* depend on compiler for an atomic pmd read */ - pmd_t pmdval = *pmd; + pmd_t pmdval = pmd_read_atomic(pmd); /* * The barrier will stabilize the pmdval in a register or on * the stack so that it will stop changing under the code. diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 4cd59b95858f..7185b8f15ced 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -225,6 +225,7 @@ header-y += kd.h header-y += kdev_t.h header-y += kernel.h header-y += kernelcapi.h +header-y += kernel-page-flags.h header-y += keyboard.h header-y += keyctl.h header-y += l2tp.h diff --git a/include/linux/apple_bl.h b/include/linux/apple_bl.h index 47bedc0eee69..0a95e730fcea 100644 --- a/include/linux/apple_bl.h +++ b/include/linux/apple_bl.h @@ -5,7 +5,7 @@ #ifndef _LINUX_APPLE_BL_H #define _LINUX_APPLE_BL_H -#ifdef CONFIG_BACKLIGHT_APPLE +#if defined(CONFIG_BACKLIGHT_APPLE) || defined(CONFIG_BACKLIGHT_APPLE_MODULE) extern int apple_bl_register(void); extern void apple_bl_unregister(void); diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 1a0cd270bb7a..324fe08ea3b1 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -135,9 +135,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, int flags); -extern void *alloc_bootmem_section(unsigned long size, - unsigned long section_nr); - #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP extern void *alloc_remap(int nid, unsigned long size); #else diff --git a/include/linux/bug.h b/include/linux/bug.h index 72961c39576a..aaac4bba6f5c 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -30,6 +30,13 @@ struct pt_regs; #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) +/* + * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the + * expression but avoids the generation of any code, even if that expression + * has side-effects. + */ +#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e)))) + /** * BUILD_BUG_ON - break compile if a condition is true. * @condition: the condition which the compiler should know is false. diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 51a90b7f2d60..e988037abd2a 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -1,6 +1,8 @@ #ifndef _LINUX_COMPACTION_H #define _LINUX_COMPACTION_H +#include <linux/node.h> + /* Return values for compact_zone() and try_to_compact_pages() */ /* compaction didn't start as it was not possible or direct reclaim was more suitable */ #define COMPACT_SKIPPED 0 @@ -11,6 +13,23 @@ /* The full zone was compacted */ #define COMPACT_COMPLETE 3 +/* + * compaction supports three modes + * + * COMPACT_ASYNC_MOVABLE uses asynchronous migration and only scans + * MIGRATE_MOVABLE pageblocks as migration sources and targets. + * COMPACT_ASYNC_UNMOVABLE uses asynchronous migration and only scans + * MIGRATE_MOVABLE pageblocks as migration sources. + * MIGRATE_UNMOVABLE pageblocks are scanned as potential migration + * targets and convers them to MIGRATE_MOVABLE if possible + * COMPACT_SYNC uses synchronous migration and scans all pageblocks + */ +enum compact_mode { + COMPACT_ASYNC_MOVABLE, + COMPACT_ASYNC_UNMOVABLE, + COMPACT_SYNC, +}; + #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, diff --git a/include/linux/fb.h b/include/linux/fb.h index d31cb682e173..a3229d7ab9f2 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -554,6 +554,10 @@ struct fb_cursor_user { #define FB_EVENT_FB_UNBIND 0x0E /* CONSOLE-SPECIFIC: remap all consoles to new fb - for vga switcheroo */ #define FB_EVENT_REMAP_ALL_CONSOLE 0x0F +/* A hardware display blank early change occured */ +#define FB_EARLY_EVENT_BLANK 0x10 +/* A hardware display blank revert early change occured */ +#define FB_R_EARLY_EVENT_BLANK 0x11 struct fb_event { struct fb_info *info; diff --git a/include/linux/fs.h b/include/linux/fs.h index cdc1a9630948..038076b27ea4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1681,7 +1681,6 @@ struct inode_operations { ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); - void (*truncate_range)(struct inode *, loff_t, loff_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); } ____cacheline_aligned; diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index c8af7a2efb52..4c59b1131187 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -59,6 +59,8 @@ extern pmd_t *page_check_address_pmd(struct page *page, #define HPAGE_PMD_MASK HPAGE_MASK #define HPAGE_PMD_SIZE HPAGE_SIZE +extern bool is_vma_temporary_stack(struct vm_area_struct *vma); + #define transparent_hugepage_enabled(__vma) \ ((transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_FLAG) || \ diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 387571959dd9..6883e197acb9 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -36,6 +36,7 @@ const char *kallsyms_lookup(unsigned long addr, /* Look up a kernel symbol and return it in a text buffer. */ extern int sprint_symbol(char *buffer, unsigned long address); +extern int sprint_symbol_no_offset(char *buffer, unsigned long address); extern int sprint_backtrace(char *buffer, unsigned long address); /* Look up a kernel symbol and print it to the kernel messages. */ @@ -80,6 +81,12 @@ static inline int sprint_symbol(char *buffer, unsigned long addr) return 0; } +static inline int sprint_symbol_no_offset(char *buffer, unsigned long addr) +{ + *buffer = '\0'; + return 0; +} + static inline int sprint_backtrace(char *buffer, unsigned long addr) { *buffer = '\0'; diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h index 26a65711676f..a1bdf6966357 100644 --- a/include/linux/kernel-page-flags.h +++ b/include/linux/kernel-page-flags.h @@ -32,6 +32,8 @@ #define KPF_KSM 21 #define KPF_THP 22 +#ifdef __KERNEL__ + /* kernel hacking assistances * WARNING: subject to change, never rely on them! */ @@ -44,4 +46,6 @@ #define KPF_ARCH 38 #define KPF_UNCACHED 39 +#endif /* __KERNEL__ */ + #endif /* LINUX_KERNEL_PAGE_FLAGS_H */ diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 8877123f2d6e..e00c3b0ebc6b 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -40,6 +40,16 @@ struct lcd_ops { /* Get the LCD panel power status (0: full on, 1..3: controller power on, flat panel power off, 4: full off), see FB_BLANK_XXX */ int (*get_power)(struct lcd_device *); + /* + * Enable or disable power to the LCD(0: on; 4: off, see FB_BLANK_XXX) + * and this callback would be called proir to fb driver's callback. + * + * P.S. note that if early_set_power is not NULL then early fb notifier + * would be registered. + */ + int (*early_set_power)(struct lcd_device *, int power); + /* revert the effects of the early blank event. */ + int (*r_early_set_power)(struct lcd_device *, int power); /* Enable or disable power to the LCD (0: on; 4: off, see FB_BLANK_XXX) */ int (*set_power)(struct lcd_device *, int power); /* Get the current contrast setting (0-max_contrast) */ diff --git a/include/linux/led-lm3530.h b/include/linux/led-lm3530.h index eeae6e742471..4b133479d6ea 100644 --- a/include/linux/led-lm3530.h +++ b/include/linux/led-lm3530.h @@ -92,7 +92,7 @@ struct lm3530_pwm_data { * @als2_resistor_sel: internal resistance from ALS2 input to ground * @als_vmin: als input voltage calibrated for max brightness in mV * @als_vmax: als input voltage calibrated for min brightness in mV - * @brt_val: brightness value (0-255) + * @brt_val: brightness value (0-127) * @pwm_data: PWM control functions (only valid when the mode is PWM) */ struct lm3530_platform_data { diff --git a/include/linux/leds.h b/include/linux/leds.h index 5884def15a24..39eee41d8c6f 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -73,6 +73,8 @@ struct led_classdev { struct led_trigger *trigger; struct list_head trig_list; void *trigger_data; + /* true if activated - deactivate routine uses it to do cleanup */ + bool activated; #endif }; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f94efd2f6c27..83e7ba90d6e5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -63,12 +63,7 @@ extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); -struct lruvec *mem_cgroup_lru_add_list(struct zone *, struct page *, - enum lru_list); -void mem_cgroup_lru_del_list(struct page *, enum lru_list); -void mem_cgroup_lru_del(struct page *); -struct lruvec *mem_cgroup_lru_move_lists(struct zone *, struct page *, - enum lru_list, enum lru_list); +struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); /* For coalescing uncharge for reducing memcg' overhead*/ extern void mem_cgroup_uncharge_start(void); @@ -79,6 +74,8 @@ extern void mem_cgroup_uncharge_cache_page(struct page *page); extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, int order); +bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, + struct mem_cgroup *memcg); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg); extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); @@ -92,10 +89,13 @@ static inline int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) { struct mem_cgroup *memcg; + int match; + rcu_read_lock(); memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner)); + match = __mem_cgroup_same_or_subtree(cgroup, memcg); rcu_read_unlock(); - return cgroup == memcg; + return match; } extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); @@ -114,17 +114,11 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); /* * For memory reclaim. */ -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, - struct zone *zone); -int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, - struct zone *zone); +int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec); +int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); -unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, - int nid, int zid, unsigned int lrumask); -struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, - struct zone *zone); -struct zone_reclaim_stat* -mem_cgroup_get_reclaim_stat_from_page(struct page *page); +unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list); +void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int); extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p); extern void mem_cgroup_replace_page_cache(struct page *oldpage, @@ -251,25 +245,8 @@ static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, return &zone->lruvec; } -static inline struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, - struct page *page, - enum lru_list lru) -{ - return &zone->lruvec; -} - -static inline void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) -{ -} - -static inline void mem_cgroup_lru_del(struct page *page) -{ -} - -static inline struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone, - struct page *page, - enum lru_list from, - enum lru_list to) +static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, + struct zone *zone) { return &zone->lruvec; } @@ -333,35 +310,27 @@ static inline bool mem_cgroup_disabled(void) } static inline int -mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) { return 1; } static inline int -mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone) +mem_cgroup_inactive_file_is_low(struct lruvec *lruvec) { return 1; } static inline unsigned long -mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, - unsigned int lru_mask) +mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { return 0; } - -static inline struct zone_reclaim_stat* -mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone) -{ - return NULL; -} - -static inline struct zone_reclaim_stat* -mem_cgroup_get_reclaim_stat_from_page(struct page *page) +static inline void +mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, + int increment) { - return NULL; } static inline void diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 7c727a90d70d..4aa42732e47f 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -225,8 +225,8 @@ static inline void check_highest_zone(enum zone_type k) policy_zone = k; } -int do_migrate_pages(struct mm_struct *mm, - const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); +int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, + const nodemask_t *to, int flags); #ifdef CONFIG_TMPFS @@ -354,9 +354,8 @@ static inline bool mempolicy_nodemask_intersects(struct task_struct *tsk, return false; } -static inline int do_migrate_pages(struct mm_struct *mm, - const nodemask_t *from_nodes, - const nodemask_t *to_nodes, int flags) +static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, + const nodemask_t *to, int flags) { return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 7d5c37f24c63..ce26716238c3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -321,6 +321,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) static inline void compound_lock(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE + VM_BUG_ON(PageSlab(page)); bit_spin_lock(PG_compound_lock, &page->flags); #endif } @@ -328,6 +329,7 @@ static inline void compound_lock(struct page *page) static inline void compound_unlock(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE + VM_BUG_ON(PageSlab(page)); bit_spin_unlock(PG_compound_lock, &page->flags); #endif } @@ -871,8 +873,6 @@ extern void pagefault_out_of_memory(void); extern void show_free_areas(unsigned int flags); extern bool skip_free_areas_node(unsigned int flags, int nid); -int shmem_lock(struct file *file, int lock, struct user_struct *user); -struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); int shmem_zero_setup(struct vm_area_struct *); extern int can_do_mlock(void); @@ -951,11 +951,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); extern int vmtruncate(struct inode *inode, loff_t offset); -extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); int generic_error_remove_page(struct address_space *mapping, struct page *page); - int invalidate_inode_page(struct page *page); #ifdef CONFIG_MMU diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 227fd3e9a9c9..1397ccf81e91 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -21,22 +21,22 @@ static inline int page_is_file_cache(struct page *page) return !PageSwapBacked(page); } -static inline void -add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list lru) +static __always_inline void add_page_to_lru_list(struct page *page, + struct lruvec *lruvec, enum lru_list lru) { - struct lruvec *lruvec; - - lruvec = mem_cgroup_lru_add_list(zone, page, lru); + int nr_pages = hpage_nr_pages(page); + mem_cgroup_update_lru_size(lruvec, lru, nr_pages); list_add(&page->lru, &lruvec->lists[lru]); - __mod_zone_page_state(zone, NR_LRU_BASE + lru, hpage_nr_pages(page)); + __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages); } -static inline void -del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list lru) +static __always_inline void del_page_from_lru_list(struct page *page, + struct lruvec *lruvec, enum lru_list lru) { - mem_cgroup_lru_del_list(page, lru); + int nr_pages = hpage_nr_pages(page); + mem_cgroup_update_lru_size(lruvec, lru, -nr_pages); list_del(&page->lru); - __mod_zone_page_state(zone, NR_LRU_BASE + lru, -hpage_nr_pages(page)); + __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, -nr_pages); } /** @@ -61,7 +61,7 @@ static inline enum lru_list page_lru_base_type(struct page *page) * Returns the LRU list a page was on, as an index into the array of LRU * lists; and clears its Unevictable or Active flags, ready for freeing. */ -static inline enum lru_list page_off_lru(struct page *page) +static __always_inline enum lru_list page_off_lru(struct page *page) { enum lru_list lru; @@ -85,7 +85,7 @@ static inline enum lru_list page_off_lru(struct page *page) * Returns the LRU list a page should be on, as an index * into the array of LRU lists. */ -static inline enum lru_list page_lru(struct page *page) +static __always_inline enum lru_list page_lru(struct page *page) { enum lru_list lru; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 26574c726121..dad95bdd06d7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -345,17 +345,6 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; - /* Swap token stuff */ - /* - * Last value of global fault stamp as seen by this process. - * In other words, this value gives an indication of how long - * it has been since this task got the token. - * Look at mm/thrash.c - */ - unsigned int faultstamp; - unsigned int token_priority; - unsigned int last_interval; - unsigned long flags; /* Must use atomic bitops to access the bits */ struct core_state *core_state; /* coredumping support */ diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index c04ecfe03f7f..580bd587d916 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -4,7 +4,7 @@ #ifdef CONFIG_DEBUG_VM #define VM_BUG_ON(cond) BUG_ON(cond) #else -#define VM_BUG_ON(cond) do { (void)(cond); } while (0) +#define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) #endif #ifdef CONFIG_DEBUG_VIRTUAL diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4871e31ae277..2427706f78b4 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -185,8 +185,25 @@ static inline int is_unevictable_lru(enum lru_list lru) return (lru == LRU_UNEVICTABLE); } +struct zone_reclaim_stat { + /* + * The pageout code in vmscan.c keeps track of how many of the + * mem/swap backed and file backed pages are refeferenced. + * The higher the rotated/scanned ratio, the more valuable + * that cache is. + * + * The anon LRU stats live in [0], file LRU stats in [1] + */ + unsigned long recent_rotated[2]; + unsigned long recent_scanned[2]; +}; + struct lruvec { struct list_head lists[NR_LRU_LISTS]; + struct zone_reclaim_stat reclaim_stat; +#ifdef CONFIG_CGROUP_MEM_RES_CTLR + struct zone *zone; +#endif }; /* Mask used at gathering information at once (see memcontrol.c) */ @@ -195,16 +212,12 @@ struct lruvec { #define LRU_ALL_EVICTABLE (LRU_ALL_FILE | LRU_ALL_ANON) #define LRU_ALL ((1 << NR_LRU_LISTS) - 1) -/* Isolate inactive pages */ -#define ISOLATE_INACTIVE ((__force isolate_mode_t)0x1) -/* Isolate active pages */ -#define ISOLATE_ACTIVE ((__force isolate_mode_t)0x2) /* Isolate clean file */ -#define ISOLATE_CLEAN ((__force isolate_mode_t)0x4) +#define ISOLATE_CLEAN ((__force isolate_mode_t)0x1) /* Isolate unmapped file */ -#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x8) +#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) /* Isolate for asynchronous migration */ -#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x10) +#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) /* LRU Isolation modes. */ typedef unsigned __bitwise__ isolate_mode_t; @@ -313,19 +326,6 @@ enum zone_type { #error ZONES_SHIFT -- too many zones configured adjust calculation #endif -struct zone_reclaim_stat { - /* - * The pageout code in vmscan.c keeps track of how many of the - * mem/swap backed and file backed pages are refeferenced. - * The higher the rotated/scanned ratio, the more valuable - * that cache is. - * - * The anon LRU stats live in [0], file LRU stats in [1] - */ - unsigned long recent_rotated[2]; - unsigned long recent_scanned[2]; -}; - struct zone { /* Fields commonly accessed by the page allocator */ @@ -407,8 +407,6 @@ struct zone { spinlock_t lru_lock; struct lruvec lruvec; - struct zone_reclaim_stat reclaim_stat; - unsigned long pages_scanned; /* since last reclaim */ unsigned long flags; /* zone flags, see below */ @@ -734,6 +732,17 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, unsigned long size, enum memmap_context context); +extern void lruvec_init(struct lruvec *lruvec, struct zone *zone); + +static inline struct zone *lruvec_zone(struct lruvec *lruvec) +{ +#ifdef CONFIG_CGROUP_MEM_RES_CTLR + return lruvec->zone; +#else + return container_of(lruvec, struct zone, lruvec); +#endif +} + #ifdef CONFIG_HAVE_MEMORY_PRESENT void memory_present(int nid, unsigned long start, unsigned long end); #else diff --git a/include/linux/oom.h b/include/linux/oom.h index 3d7647536b03..e4c29bc72e70 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -43,8 +43,9 @@ enum oom_constraint { extern void compare_swap_oom_score_adj(int old_val, int new_val); extern int test_set_oom_score_adj(int new_val); -extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg, - const nodemask_t *nodemask, unsigned long totalpages); +extern unsigned long oom_badness(struct task_struct *p, + struct mem_cgroup *memcg, const nodemask_t *nodemask, + unsigned long totalpages); extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index efa26b4da8d2..7cfad3bbb0cc 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -460,11 +460,11 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size) */ static inline int fault_in_multipages_writeable(char __user *uaddr, int size) { - int ret; + int ret = 0; char __user *end = uaddr + size - 1; if (unlikely(size == 0)) - return 0; + return ret; /* * Writing zeroes into userspace here is OK, because we know that if @@ -489,11 +489,11 @@ static inline int fault_in_multipages_readable(const char __user *uaddr, int size) { volatile char c; - int ret; + int ret = 0; const char __user *end = uaddr + size - 1; if (unlikely(size == 0)) - return 0; + return ret; while (uaddr <= end) { ret = __get_user(c, uaddr); diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index fb201896a8b0..7d7fbe2ef782 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -119,7 +119,7 @@ int __must_check res_counter_charge_locked(struct res_counter *counter, unsigned long val, bool force); int __must_check res_counter_charge(struct res_counter *counter, unsigned long val, struct res_counter **limit_fail_at); -int __must_check res_counter_charge_nofail(struct res_counter *counter, +int res_counter_charge_nofail(struct res_counter *counter, unsigned long val, struct res_counter **limit_fail_at); /* @@ -135,6 +135,9 @@ int __must_check res_counter_charge_nofail(struct res_counter *counter, void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); void res_counter_uncharge(struct res_counter *counter, unsigned long val); +void res_counter_uncharge_until(struct res_counter *counter, + struct res_counter *top, + unsigned long val); /** * res_counter_margin - calculate chargeable space of a counter * @cnt: the counter diff --git a/include/linux/rmap.h b/include/linux/rmap.h index fd07c4542cee..3fce545df394 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -173,8 +173,6 @@ enum ttu_flags { }; #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) -bool is_vma_temporary_stack(struct vm_area_struct *vma); - int try_to_unmap(struct page *, enum ttu_flags flags); int try_to_unmap_one(struct page *, struct vm_area_struct *, unsigned long address, enum ttu_flags flags); diff --git a/include/linux/rtc.h b/include/linux/rtc.h index fcabfb4873c8..f071b3922c67 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -91,6 +91,9 @@ struct rtc_pll_info { #define RTC_PLL_GET _IOR('p', 0x11, struct rtc_pll_info) /* Get PLL correction */ #define RTC_PLL_SET _IOW('p', 0x12, struct rtc_pll_info) /* Set PLL correction */ +#define RTC_VL_READ _IOR('p', 0x13, int) /* Voltage low detector */ +#define RTC_VL_CLR _IO('p', 0x14) /* Clear voltage low information */ + /* interrupt flags */ #define RTC_IRQF 0x80 /* Any of the following is active */ #define RTC_PF 0x40 /* Periodic interrupt */ diff --git a/include/linux/rtc/ds1307.h b/include/linux/rtc/ds1307.h new file mode 100644 index 000000000000..291b1c490367 --- /dev/null +++ b/include/linux/rtc/ds1307.h @@ -0,0 +1,22 @@ +/* + * ds1307.h - platform_data for the ds1307 (and variants) rtc driver + * (C) Copyright 2012 by Wolfram Sang, Pengutronix e.K. + * same license as the driver + */ + +#ifndef _LINUX_DS1307_H +#define _LINUX_DS1307_H + +#include <linux/types.h> + +#define DS1307_TRICKLE_CHARGER_250_OHM 0x01 +#define DS1307_TRICKLE_CHARGER_2K_OHM 0x02 +#define DS1307_TRICKLE_CHARGER_4K_OHM 0x03 +#define DS1307_TRICKLE_CHARGER_NO_DIODE 0x04 +#define DS1307_TRICKLE_CHARGER_DIODE 0x08 + +struct ds1307_platform_data { + u8 trickle_charger_setup; +}; + +#endif /* _LINUX_DS1307_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index b1fd5c7925fe..b6661933e252 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -221,8 +221,8 @@ extern unsigned int nr_free_pagecache_pages(void); /* linux/mm/swap.c */ extern void __lru_cache_add(struct page *, enum lru_list lru); extern void lru_cache_add_lru(struct page *, enum lru_list lru); -extern void lru_add_page_tail(struct zone* zone, - struct page *page, struct page *page_tail); +extern void lru_add_page_tail(struct page *page, struct page *page_tail, + struct lruvec *lruvec); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); @@ -251,7 +251,7 @@ static inline void lru_cache_add_file(struct page *page) /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); -extern int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file); +extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap); extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, @@ -351,31 +351,14 @@ extern int swap_type_of(dev_t, sector_t, struct block_device **); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct page *, struct block_device **); extern sector_t swapdev_block(int, pgoff_t); +extern int page_swapcount(struct page *); extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; -/* linux/mm/thrash.c */ -extern struct mm_struct *swap_token_mm; -extern void grab_swap_token(struct mm_struct *); -extern void __put_swap_token(struct mm_struct *); -extern void disable_swap_token(struct mem_cgroup *memcg); - -static inline int has_swap_token(struct mm_struct *mm) -{ - return (mm == swap_token_mm); -} - -static inline void put_swap_token(struct mm_struct *mm) -{ - if (has_swap_token(mm)) - __put_swap_token(mm); -} - #ifdef CONFIG_CGROUP_MEM_RES_CTLR extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); -extern int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep); #else static inline void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) @@ -462,6 +445,11 @@ static inline void delete_from_swap_cache(struct page *page) { } +static inline int page_swapcount(struct page *page) +{ + return 0; +} + #define reuse_swap_page(page) (page_mapcount(page) == 1) static inline int try_to_free_swap(struct page *page) @@ -476,37 +464,11 @@ static inline swp_entry_t get_swap_page(void) return entry; } -/* linux/mm/thrash.c */ -static inline void put_swap_token(struct mm_struct *mm) -{ -} - -static inline void grab_swap_token(struct mm_struct *mm) -{ -} - -static inline int has_swap_token(struct mm_struct *mm) -{ - return 0; -} - -static inline void disable_swap_token(struct mem_cgroup *memcg) -{ -} - static inline void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) { } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR -static inline int -mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep) -{ - return 0; -} -#endif - #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ diff --git a/include/net/sock.h b/include/net/sock.h index d89f0582b6b6..4a4521699563 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -46,6 +46,7 @@ #include <linux/list_nulls.h> #include <linux/timer.h> #include <linux/cache.h> +#include <linux/bitops.h> #include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/skbuff.h> /* struct sk_buff */ @@ -921,12 +922,23 @@ struct proto { #endif }; +/* + * Bits in struct cg_proto.flags + */ +enum cg_proto_flags { + /* Currently active and new sockets should be assigned to cgroups */ + MEMCG_SOCK_ACTIVE, + /* It was ever activated; we must disarm static keys on destruction */ + MEMCG_SOCK_ACTIVATED, +}; + struct cg_proto { void (*enter_memory_pressure)(struct sock *sk); struct res_counter *memory_allocated; /* Current allocated memory. */ struct percpu_counter *sockets_allocated; /* Current number of sockets. */ int *memory_pressure; long *sysctl_mem; + unsigned long flags; /* * memcg field is used to find which memcg we belong directly * Each memcg struct can hold more than one cg_proto, so container_of @@ -942,6 +954,16 @@ struct cg_proto { extern int proto_register(struct proto *prot, int alloc_slab); extern void proto_unregister(struct proto *prot); +static inline bool memcg_proto_active(struct cg_proto *cg_proto) +{ + return test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); +} + +static inline bool memcg_proto_activated(struct cg_proto *cg_proto) +{ + return test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags); +} + #ifdef SOCK_REFCNT_DEBUG static inline void sk_refcnt_debug_inc(struct sock *sk) { diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index f64560e204bc..bab3b87e4064 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -13,7 +13,7 @@ #define RECLAIM_WB_ANON 0x0001u #define RECLAIM_WB_FILE 0x0002u #define RECLAIM_WB_MIXED 0x0010u -#define RECLAIM_WB_SYNC 0x0004u +#define RECLAIM_WB_SYNC 0x0004u /* Unused, all reclaim async */ #define RECLAIM_WB_ASYNC 0x0008u #define show_reclaim_flags(flags) \ @@ -25,15 +25,15 @@ {RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \ ) : "RECLAIM_WB_NONE" -#define trace_reclaim_flags(page, sync) ( \ +#define trace_reclaim_flags(page) ( \ (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ - (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ + (RECLAIM_WB_ASYNC) \ ) -#define trace_shrink_flags(file, sync) ( \ - (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_MIXED : \ - (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) | \ - (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ +#define trace_shrink_flags(file) \ + ( \ + (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ + (RECLAIM_WB_ASYNC) \ ) TRACE_EVENT(mm_vmscan_kswapd_sleep, @@ -263,22 +263,16 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, unsigned long nr_requested, unsigned long nr_scanned, unsigned long nr_taken, - unsigned long nr_lumpy_taken, - unsigned long nr_lumpy_dirty, - unsigned long nr_lumpy_failed, isolate_mode_t isolate_mode, int file), - TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file), + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file), TP_STRUCT__entry( __field(int, order) __field(unsigned long, nr_requested) __field(unsigned long, nr_scanned) __field(unsigned long, nr_taken) - __field(unsigned long, nr_lumpy_taken) - __field(unsigned long, nr_lumpy_dirty) - __field(unsigned long, nr_lumpy_failed) __field(isolate_mode_t, isolate_mode) __field(int, file) ), @@ -288,22 +282,16 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __entry->nr_requested = nr_requested; __entry->nr_scanned = nr_scanned; __entry->nr_taken = nr_taken; - __entry->nr_lumpy_taken = nr_lumpy_taken; - __entry->nr_lumpy_dirty = nr_lumpy_dirty; - __entry->nr_lumpy_failed = nr_lumpy_failed; __entry->isolate_mode = isolate_mode; __entry->file = file; ), - TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu file=%d", + TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu file=%d", __entry->isolate_mode, __entry->order, __entry->nr_requested, __entry->nr_scanned, __entry->nr_taken, - __entry->nr_lumpy_taken, - __entry->nr_lumpy_dirty, - __entry->nr_lumpy_failed, __entry->file) ); @@ -313,13 +301,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, unsigned long nr_requested, unsigned long nr_scanned, unsigned long nr_taken, - unsigned long nr_lumpy_taken, - unsigned long nr_lumpy_dirty, - unsigned long nr_lumpy_failed, isolate_mode_t isolate_mode, int file), - TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file) ); @@ -329,13 +314,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, unsigned long nr_requested, unsigned long nr_scanned, unsigned long nr_taken, - unsigned long nr_lumpy_taken, - unsigned long nr_lumpy_dirty, - unsigned long nr_lumpy_failed, isolate_mode_t isolate_mode, int file), - TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file) ); @@ -395,88 +377,6 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive, show_reclaim_flags(__entry->reclaim_flags)) ); -TRACE_EVENT(replace_swap_token, - TP_PROTO(struct mm_struct *old_mm, - struct mm_struct *new_mm), - - TP_ARGS(old_mm, new_mm), - - TP_STRUCT__entry( - __field(struct mm_struct*, old_mm) - __field(unsigned int, old_prio) - __field(struct mm_struct*, new_mm) - __field(unsigned int, new_prio) - ), - - TP_fast_assign( - __entry->old_mm = old_mm; - __entry->old_prio = old_mm ? old_mm->token_priority : 0; - __entry->new_mm = new_mm; - __entry->new_prio = new_mm->token_priority; - ), - - TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u", - __entry->old_mm, __entry->old_prio, - __entry->new_mm, __entry->new_prio) -); - -DECLARE_EVENT_CLASS(put_swap_token_template, - TP_PROTO(struct mm_struct *swap_token_mm), - - TP_ARGS(swap_token_mm), - - TP_STRUCT__entry( - __field(struct mm_struct*, swap_token_mm) - ), - - TP_fast_assign( - __entry->swap_token_mm = swap_token_mm; - ), - - TP_printk("token_mm=%p", __entry->swap_token_mm) -); - -DEFINE_EVENT(put_swap_token_template, put_swap_token, - TP_PROTO(struct mm_struct *swap_token_mm), - TP_ARGS(swap_token_mm) -); - -DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token, - TP_PROTO(struct mm_struct *swap_token_mm), - TP_ARGS(swap_token_mm), - TP_CONDITION(swap_token_mm != NULL) -); - -TRACE_EVENT_CONDITION(update_swap_token_priority, - TP_PROTO(struct mm_struct *mm, - unsigned int old_prio, - struct mm_struct *swap_token_mm), - - TP_ARGS(mm, old_prio, swap_token_mm), - - TP_CONDITION(mm->token_priority != old_prio), - - TP_STRUCT__entry( - __field(struct mm_struct*, mm) - __field(unsigned int, old_prio) - __field(unsigned int, new_prio) - __field(struct mm_struct*, swap_token_mm) - __field(unsigned int, swap_token_prio) - ), - - TP_fast_assign( - __entry->mm = mm; - __entry->old_prio = old_prio; - __entry->new_prio = mm->token_priority; - __entry->swap_token_mm = swap_token_mm; - __entry->swap_token_prio = swap_token_mm ? swap_token_mm->token_priority : 0; - ), - - TP_printk("mm=%p old_prio=%u new_prio=%u swap_token_mm=%p token_prio=%u", - __entry->mm, __entry->old_prio, __entry->new_prio, - __entry->swap_token_mm, __entry->swap_token_prio) -); - #endif /* _TRACE_VMSCAN_H */ /* This part must be outside protection */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a0c6af34d500..0f3527d6184a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5132,7 +5132,7 @@ EXPORT_SYMBOL_GPL(css_depth); * @root: the css supporsed to be an ancestor of the child. * * Returns true if "root" is an ancestor of "child" in its hierarchy. Because - * this function reads css->id, this use rcu_dereference() and rcu_read_lock(). + * this function reads css->id, the caller must hold rcu_read_lock(). * But, considering usual usage, the csses should be valid objects after test. * Assuming that the caller will do some action to the child if this returns * returns true, the caller must take "child";s reference count. @@ -5144,18 +5144,18 @@ bool css_is_ancestor(struct cgroup_subsys_state *child, { struct css_id *child_id; struct css_id *root_id; - bool ret = true; - rcu_read_lock(); child_id = rcu_dereference(child->id); + if (!child_id) + return false; root_id = rcu_dereference(root->id); - if (!child_id - || !root_id - || (child_id->depth < root_id->depth) - || (child_id->stack[root_id->depth] != root_id->id)) - ret = false; - rcu_read_unlock(); - return ret; + if (!root_id) + return false; + if (child_id->depth < root_id->depth) + return false; + if (child_id->stack[root_id->depth] != root_id->id) + return false; + return true; } void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) diff --git a/kernel/fork.c b/kernel/fork.c index 47b4e4f379f9..017fb23d5983 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -386,7 +386,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) } charge = 0; if (mpnt->vm_flags & VM_ACCOUNT) { - unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + unsigned long len; + len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ goto fail_nomem; charge = len; @@ -614,7 +615,6 @@ void mmput(struct mm_struct *mm) list_del(&mm->mmlist); spin_unlock(&mmlist_lock); } - put_swap_token(mm); if (mm->binfmt) module_put(mm->binfmt->module); mmdrop(mm); @@ -831,10 +831,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk) memcpy(mm, oldmm, sizeof(*mm)); mm_init_cpumask(mm); - /* Initializing for Swap token stuff */ - mm->token_priority = 0; - mm->last_interval = 0; - #ifdef CONFIG_TRANSPARENT_HUGEPAGE mm->pmd_huge_pte = NULL; #endif @@ -913,10 +909,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) goto fail_nomem; good_mm: - /* Initializing for Swap token stuff */ - mm->token_priority = 0; - mm->last_interval = 0; - tsk->mm = mm; tsk->active_mm = mm; return 0; diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 079f1d39a8b8..2169feeba529 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -343,7 +343,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, /* Look up a kernel symbol and return it in a text buffer. */ static int __sprint_symbol(char *buffer, unsigned long address, - int symbol_offset) + int symbol_offset, int add_offset) { char *modname; const char *name; @@ -358,13 +358,13 @@ static int __sprint_symbol(char *buffer, unsigned long address, if (name != buffer) strcpy(buffer, name); len = strlen(buffer); - buffer += len; offset -= symbol_offset; + if (add_offset) + len += sprintf(buffer + len, "+%#lx/%#lx", offset, size); + if (modname) - len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname); - else - len += sprintf(buffer, "+%#lx/%#lx", offset, size); + len += sprintf(buffer + len, " [%s]", modname); return len; } @@ -382,12 +382,28 @@ static int __sprint_symbol(char *buffer, unsigned long address, */ int sprint_symbol(char *buffer, unsigned long address) { - return __sprint_symbol(buffer, address, 0); + return __sprint_symbol(buffer, address, 0, 1); } - EXPORT_SYMBOL_GPL(sprint_symbol); /** + * sprint_symbol_no_offset - Look up a kernel symbol and return it in a text buffer + * @buffer: buffer to be stored + * @address: address to lookup + * + * This function looks up a kernel symbol with @address and stores its name + * and module name to @buffer if possible. If no symbol was found, just saves + * its @address as is. + * + * This function returns the number of bytes stored in @buffer. + */ +int sprint_symbol_no_offset(char *buffer, unsigned long address) +{ + return __sprint_symbol(buffer, address, 0, 0); +} +EXPORT_SYMBOL_GPL(sprint_symbol_no_offset); + +/** * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer * @buffer: buffer to be stored * @address: address to lookup @@ -403,7 +419,7 @@ EXPORT_SYMBOL_GPL(sprint_symbol); */ int sprint_backtrace(char *buffer, unsigned long address) { - return __sprint_symbol(buffer, address, -1); + return __sprint_symbol(buffer, address, -1, 1); } /* Look up a kernel symbol and print it to the kernel messages. */ diff --git a/kernel/res_counter.c b/kernel/res_counter.c index bebe2b170d49..ad581aa2369a 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -94,13 +94,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) counter->usage -= val; } -void res_counter_uncharge(struct res_counter *counter, unsigned long val) +void res_counter_uncharge_until(struct res_counter *counter, + struct res_counter *top, + unsigned long val) { unsigned long flags; struct res_counter *c; local_irq_save(flags); - for (c = counter; c != NULL; c = c->parent) { + for (c = counter; c != top; c = c->parent) { spin_lock(&c->lock); res_counter_uncharge_locked(c, val); spin_unlock(&c->lock); @@ -108,6 +110,10 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val) local_irq_restore(flags); } +void res_counter_uncharge(struct res_counter *counter, unsigned long val) +{ + res_counter_uncharge_until(counter, NULL, val); +} static inline unsigned long long * res_counter_member(struct res_counter *counter, int member) diff --git a/lib/bitmap.c b/lib/bitmap.c index b5a8b6ad2454..06fdfa1aeba7 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -369,7 +369,8 @@ EXPORT_SYMBOL(bitmap_find_next_zero_area); * @nmaskbits: size of bitmap, in bits * * Exactly @nmaskbits bits are displayed. Hex digits are grouped into - * comma-separated sets of eight digits per set. + * comma-separated sets of eight digits per set. Returns the number of + * characters which were written to *buf, excluding the trailing \0. */ int bitmap_scnprintf(char *buf, unsigned int buflen, const unsigned long *maskp, int nmaskbits) @@ -517,8 +518,8 @@ EXPORT_SYMBOL(bitmap_parse_user); * * Helper routine for bitmap_scnlistprintf(). Write decimal number * or range to buf, suppressing output past buf+buflen, with optional - * comma-prefix. Return len of what would be written to buf, if it - * all fit. + * comma-prefix. Return len of what was written to *buf, excluding the + * trailing \0. */ static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len) { @@ -544,9 +545,8 @@ static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len) * the range. Output format is compatible with the format * accepted as input by bitmap_parselist(). * - * The return value is the number of characters which would be - * generated for the given input, excluding the trailing '\0', as - * per ISO C99. + * The return value is the number of characters which were written to *buf + * excluding the trailing '\0', as per ISO C99's scnprintf. */ int bitmap_scnlistprintf(char *buf, unsigned int buflen, const unsigned long *maskp, int nmaskbits) diff --git a/lib/list_debug.c b/lib/list_debug.c index 3810b481f940..23a5e031cd8b 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -31,6 +31,9 @@ void __list_add(struct list_head *new, "list_add corruption. prev->next should be " "next (%p), but was %p. (prev=%p).\n", next, prev->next, prev); + WARN(new == prev || new == next, + "list_add double add: new=%p, prev=%p, next=%p.\n", + new, prev, next); next->prev = new; new->next = next; new->prev = prev; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 86516f5588e3..d7c878cc006c 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -73,11 +73,24 @@ static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; static struct kmem_cache *radix_tree_node_cachep; /* + * The radix tree is variable-height, so an insert operation not only has + * to build the branch to its corresponding item, it also has to build the + * branch to existing items if the size has to be increased (by + * radix_tree_extend). + * + * The worst case is a zero height tree with just a single item at index 0, + * and then inserting an item at index ULONG_MAX. This requires 2 new branches + * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. + * Hence: + */ +#define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) + +/* * Per-cpu pool of preloaded nodes */ struct radix_tree_preload { int nr; - struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; + struct radix_tree_node *nodes[RADIX_TREE_PRELOAD_SIZE]; }; static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index 525d160d44f0..d0ec4f3d1593 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -58,7 +58,7 @@ static void spin_dump(raw_spinlock_t *lock, const char *msg) printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", msg, raw_smp_processor_id(), current->comm, task_pid_nr(current)); - printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, " + printk(KERN_EMERG " lock: %ps, .magic: %08x, .owner: %s/%d, " ".owner_cpu: %d\n", lock, lock->magic, owner ? owner->comm : "<none>", diff --git a/lib/string_helpers.c b/lib/string_helpers.c index dd4ece372699..1cffc223bff5 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -23,15 +23,15 @@ int string_get_size(u64 size, const enum string_size_units units, char *buf, int len) { - const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB", + static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL}; - const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", + static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", NULL }; - const char **units_str[] = { + static const char **units_str[] = { [STRING_UNITS_10] = units_10, [STRING_UNITS_2] = units_2, }; - const unsigned int divisor[] = { + static const unsigned int divisor[] = { [STRING_UNITS_10] = 1000, [STRING_UNITS_2] = 1024, }; diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 414f46ed1dcd..45bc1f83a5ad 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -130,11 +130,9 @@ void swiotlb_print_info(void) pstart = virt_to_phys(io_tlb_start); pend = virt_to_phys(io_tlb_end); - printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n", - bytes >> 20, io_tlb_start, io_tlb_end); - printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n", - (unsigned long long)pstart, - (unsigned long long)pend); + printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", + (unsigned long long)pstart, (unsigned long long)pend - 1, + bytes >> 20, io_tlb_start, io_tlb_end - 1); } void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c index d55769d63cb8..bea3f3fa3f02 100644 --- a/lib/test-kstrtox.c +++ b/lib/test-kstrtox.c @@ -11,7 +11,7 @@ struct test_fail { }; #define DEFINE_TEST_FAIL(test) \ - const struct test_fail test[] __initdata + const struct test_fail test[] __initconst #define DECLARE_TEST_OK(type, test_type) \ test_type { \ @@ -21,7 +21,7 @@ struct test_fail { } #define DEFINE_TEST_OK(type, test) \ - const type test[] __initdata + const type test[] __initconst #define TEST_FAIL(fn, type, fmt, test) \ { \ diff --git a/lib/vsprintf.c b/lib/vsprintf.c index abbabec9720a..5391299c1e78 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -284,6 +284,7 @@ char *number(char *buf, char *end, unsigned long long num, char locase; int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); int i; + bool is_zero = num == 0LL; /* locase = 0 or 0x20. ORing digits or letters with 'locase' * produces same digits or (maybe lowercased) letters */ @@ -305,8 +306,9 @@ char *number(char *buf, char *end, unsigned long long num, } } if (need_pfx) { - spec.field_width--; if (spec.base == 16) + spec.field_width -= 2; + else if (!is_zero) spec.field_width--; } @@ -353,9 +355,11 @@ char *number(char *buf, char *end, unsigned long long num, } /* "0x" / "0" prefix */ if (need_pfx) { - if (buf < end) - *buf = '0'; - ++buf; + if (spec.base == 16 || !is_zero) { + if (buf < end) + *buf = '0'; + ++buf; + } if (spec.base == 16) { if (buf < end) *buf = ('X' | locase); @@ -436,7 +440,7 @@ char *symbol_string(char *buf, char *end, void *ptr, else if (ext != 'f' && ext != 's') sprint_symbol(sym, value); else - kallsyms_lookup(value, NULL, NULL, NULL, sym); + sprint_symbol_no_offset(sym, value); return string(buf, end, sym, spec); #else diff --git a/mm/Kconfig b/mm/Kconfig index 39220026c797..b2176374b98e 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -349,6 +349,16 @@ choice benefit. endchoice +config CROSS_MEMORY_ATTACH + bool "Cross Memory Support" + depends on MMU + default y + help + Enabling this option adds the system calls process_vm_readv and + process_vm_writev which allow a process with the correct privileges + to directly read from or write to to another process's address space. + See the man page for more details. + # # UP and nommu archs use km based percpu allocator # diff --git a/mm/Makefile b/mm/Makefile index 8aada89efbbb..a156285ce88d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -5,8 +5,11 @@ mmu-y := nommu.o mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ - vmalloc.o pagewalk.o pgtable-generic.o \ - process_vm_access.o + vmalloc.o pagewalk.o pgtable-generic.o + +ifdef CONFIG_CROSS_MEMORY_ATTACH +mmu-$(CONFIG_MMU) += process_vm_access.o +endif obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o \ @@ -25,7 +28,7 @@ endif obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o obj-$(CONFIG_BOUNCE) += bounce.o -obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o +obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o obj-$(CONFIG_HAS_DMA) += dmapool.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o diff --git a/mm/bootmem.c b/mm/bootmem.c index 0131170c9d54..ec4fcb7a56c8 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -77,16 +77,16 @@ unsigned long __init bootmem_bootmap_pages(unsigned long pages) */ static void __init link_bootmem(bootmem_data_t *bdata) { - struct list_head *iter; + bootmem_data_t *ent; - list_for_each(iter, &bdata_list) { - bootmem_data_t *ent; - - ent = list_entry(iter, bootmem_data_t, list); - if (bdata->node_min_pfn < ent->node_min_pfn) - break; + list_for_each_entry(ent, &bdata_list, list) { + if (bdata->node_min_pfn < ent->node_min_pfn) { + list_add_tail(&bdata->list, &ent->list); + return; + } } - list_add_tail(&bdata->list, iter); + + list_add_tail(&bdata->list, &bdata_list); } /* @@ -203,7 +203,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) } else { unsigned long off = 0; - while (vec && off < BITS_PER_LONG) { + vec >>= start & (BITS_PER_LONG - 1); + while (vec) { if (vec & 1) { page = pfn_to_page(start + off); __free_pages_bootmem(page, 0); @@ -467,7 +468,7 @@ static unsigned long __init align_off(struct bootmem_data *bdata, return ALIGN(base + off, align) - base; } -static void * __init alloc_bootmem_core(struct bootmem_data *bdata, +static void * __init alloc_bootmem_bdata(struct bootmem_data *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { @@ -588,14 +589,14 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, p_bdata = bootmem_arch_preferred_node(bdata, size, align, goal, limit); if (p_bdata) - return alloc_bootmem_core(p_bdata, size, align, + return alloc_bootmem_bdata(p_bdata, size, align, goal, limit); } #endif return NULL; } -static void * __init ___alloc_bootmem_nopanic(unsigned long size, +static void * __init alloc_bootmem_core(unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) @@ -603,7 +604,6 @@ static void * __init ___alloc_bootmem_nopanic(unsigned long size, bootmem_data_t *bdata; void *region; -restart: region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit); if (region) return region; @@ -614,11 +614,25 @@ restart: if (limit && bdata->node_min_pfn >= PFN_DOWN(limit)) break; - region = alloc_bootmem_core(bdata, size, align, goal, limit); + region = alloc_bootmem_bdata(bdata, size, align, goal, limit); if (region) return region; } + return NULL; +} + +static void * __init ___alloc_bootmem_nopanic(unsigned long size, + unsigned long align, + unsigned long goal, + unsigned long limit) +{ + void *ptr; + +restart: + ptr = alloc_bootmem_core(size, align, goal, limit); + if (ptr) + return ptr; if (goal) { goal = 0; goto restart; @@ -684,21 +698,56 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, return ___alloc_bootmem(size, align, goal, limit); } -static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, +static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { void *ptr; - ptr = alloc_arch_preferred_bootmem(bdata, size, align, goal, limit); +again: + ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, + align, goal, limit); if (ptr) return ptr; - ptr = alloc_bootmem_core(bdata, size, align, goal, limit); + ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit); if (ptr) return ptr; - return ___alloc_bootmem(size, align, goal, limit); + ptr = alloc_bootmem_core(size, align, goal, limit); + if (ptr) + return ptr; + + if (goal) { + goal = 0; + goto again; + } + + return NULL; +} + +void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, + unsigned long align, unsigned long goal) +{ + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); + + return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0); +} + +void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, + unsigned long align, unsigned long goal, + unsigned long limit) +{ + void *ptr; + + ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0); + if (ptr) + return ptr; + + printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size); + panic("Out of memory"); + return NULL; } /** @@ -722,7 +771,7 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); - return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); + return ___alloc_bootmem_node(pgdat, size, align, goal, 0); } void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, @@ -743,7 +792,7 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, unsigned long new_goal; new_goal = MAX_DMA32_PFN << PAGE_SHIFT; - ptr = alloc_bootmem_core(pgdat->bdata, size, align, + ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, new_goal, 0); if (ptr) return ptr; @@ -754,47 +803,6 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, } -#ifdef CONFIG_SPARSEMEM -/** - * alloc_bootmem_section - allocate boot memory from a specific section - * @size: size of the request in bytes - * @section_nr: sparse map section to allocate from - * - * Return NULL on failure. - */ -void * __init alloc_bootmem_section(unsigned long size, - unsigned long section_nr) -{ - bootmem_data_t *bdata; - unsigned long pfn, goal; - - pfn = section_nr_to_pfn(section_nr); - goal = pfn << PAGE_SHIFT; - bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; - - return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, 0); -} -#endif - -void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, - unsigned long align, unsigned long goal) -{ - void *ptr; - - if (WARN_ON_ONCE(slab_is_available())) - return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); - - ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); - if (ptr) - return ptr; - - ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); - if (ptr) - return ptr; - - return __alloc_bootmem_nopanic(size, align, goal); -} - #ifndef ARCH_LOW_ADDRESS_LIMIT #define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL #endif @@ -839,6 +847,6 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); - return ___alloc_bootmem_node(pgdat->bdata, size, align, - goal, ARCH_LOW_ADDRESS_LIMIT); + return ___alloc_bootmem_node(pgdat, size, align, + goal, ARCH_LOW_ADDRESS_LIMIT); } diff --git a/mm/compaction.c b/mm/compaction.c index da7d35ea5103..4ac338af5120 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -226,7 +226,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; struct list_head *migratelist = &cc->migratepages; - isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; + isolate_mode_t mode = 0; + struct lruvec *lruvec; /* * Ensure that there are not too many pages isolated from the LRU @@ -235,7 +236,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, */ while (unlikely(too_many_isolated(zone))) { /* async migration should just abort */ - if (!cc->sync) + if (cc->mode != COMPACT_SYNC) return 0; congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -303,7 +304,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, * satisfies the allocation */ pageblock_nr = low_pfn >> pageblock_order; - if (!cc->sync && last_pageblock_nr != pageblock_nr && + if (cc->mode != COMPACT_SYNC && + last_pageblock_nr != pageblock_nr && !migrate_async_suitable(get_pageblock_migratetype(page))) { low_pfn += pageblock_nr_pages; low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; @@ -324,17 +326,19 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, continue; } - if (!cc->sync) + if (cc->mode != COMPACT_SYNC) mode |= ISOLATE_ASYNC_MIGRATE; + lruvec = mem_cgroup_page_lruvec(page, zone); + /* Try isolate the page */ - if (__isolate_lru_page(page, mode, 0) != 0) + if (__isolate_lru_page(page, mode) != 0) continue; VM_BUG_ON(PageTransCompound(page)); /* Successfully isolated */ - del_page_from_lru_list(zone, page, page_lru(page)); + del_page_from_lru_list(page, lruvec, page_lru(page)); list_add(&page->lru, migratelist); cc->nr_migratepages++; nr_isolated++; @@ -357,27 +361,90 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, #endif /* CONFIG_COMPACTION || CONFIG_CMA */ #ifdef CONFIG_COMPACTION +/* + * Returns true if MIGRATE_UNMOVABLE pageblock was successfully + * converted to MIGRATE_MOVABLE type, false otherwise. + */ +static bool rescue_unmovable_pageblock(struct page *page) +{ + unsigned long pfn, start_pfn, end_pfn; + struct page *start_page, *end_page; + + pfn = page_to_pfn(page); + start_pfn = pfn & ~(pageblock_nr_pages - 1); + end_pfn = start_pfn + pageblock_nr_pages; + + start_page = pfn_to_page(start_pfn); + end_page = pfn_to_page(end_pfn); + + /* Do not deal with pageblocks that overlap zones */ + if (page_zone(start_page) != page_zone(end_page)) + return false; + + for (page = start_page, pfn = start_pfn; page < end_page; pfn++, + page++) { + if (!pfn_valid_within(pfn)) + continue; + + if (PageBuddy(page)) { + int order = page_order(page); + + pfn += (1 << order) - 1; + page += (1 << order) - 1; + + continue; + } else if (page_count(page) == 0 || PageLRU(page)) + continue; + + return false; + } + + set_pageblock_migratetype(page, MIGRATE_MOVABLE); + move_freepages_block(page_zone(page), page, MIGRATE_MOVABLE); + return true; +} + +enum smt_result { + GOOD_AS_MIGRATION_TARGET, + FAIL_UNMOVABLE_TARGET, + FAIL_BAD_TARGET, +}; -/* Returns true if the page is within a block suitable for migration to */ -static bool suitable_migration_target(struct page *page) +/* + * Returns GOOD_AS_MIGRATION_TARGET if the page is within a block + * suitable for migration to, FAIL_UNMOVABLE_TARGET if the page + * is within a MIGRATE_UNMOVABLE block, FAIL_BAD_TARGET otherwise. + */ +static enum smt_result suitable_migration_target(struct page *page, + struct compact_control *cc) { int migratetype = get_pageblock_migratetype(page); /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) - return false; + return FAIL_BAD_TARGET; /* If the page is a large free page, then allow migration */ if (PageBuddy(page) && page_order(page) >= pageblock_order) - return true; + return GOOD_AS_MIGRATION_TARGET; /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ - if (migrate_async_suitable(migratetype)) - return true; + if (cc->mode != COMPACT_ASYNC_UNMOVABLE && + migrate_async_suitable(migratetype)) + return GOOD_AS_MIGRATION_TARGET; + + if (cc->mode == COMPACT_ASYNC_MOVABLE && + migratetype == MIGRATE_UNMOVABLE) + return FAIL_UNMOVABLE_TARGET; + + if (cc->mode != COMPACT_ASYNC_MOVABLE && + migratetype == MIGRATE_UNMOVABLE && + rescue_unmovable_pageblock(page)) + return GOOD_AS_MIGRATION_TARGET; /* Otherwise skip the block */ - return false; + return FAIL_BAD_TARGET; } /* @@ -411,6 +478,13 @@ static void isolate_freepages(struct zone *zone, zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; /* + * isolate_freepages() may be called more than once during + * compact_zone_order() run and we want only the most recent + * count. + */ + cc->nr_pageblocks_skipped = 0; + + /* * Isolate free pages until enough are available to migrate the * pages on cc->migratepages. We stop searching if the migrate * and free page scanners meet or enough free pages are isolated. @@ -418,6 +492,7 @@ static void isolate_freepages(struct zone *zone, for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; pfn -= pageblock_nr_pages) { unsigned long isolated; + enum smt_result ret; if (!pfn_valid(pfn)) continue; @@ -434,9 +509,12 @@ static void isolate_freepages(struct zone *zone, continue; /* Check the block is suitable for migration */ - if (!suitable_migration_target(page)) + ret = suitable_migration_target(page, cc); + if (ret != GOOD_AS_MIGRATION_TARGET) { + if (ret == FAIL_UNMOVABLE_TARGET) + cc->nr_pageblocks_skipped++; continue; - + } /* * Found a block suitable for isolating free pages from. Now * we disabled interrupts, double check things are ok and @@ -445,12 +523,14 @@ static void isolate_freepages(struct zone *zone, */ isolated = 0; spin_lock_irqsave(&zone->lock, flags); - if (suitable_migration_target(page)) { + ret = suitable_migration_target(page, cc); + if (ret == GOOD_AS_MIGRATION_TARGET) { end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); isolated = isolate_freepages_block(pfn, end_pfn, freelist, false); nr_freepages += isolated; - } + } else if (ret == FAIL_UNMOVABLE_TARGET) + cc->nr_pageblocks_skipped++; spin_unlock_irqrestore(&zone->lock, flags); /* @@ -682,8 +762,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) nr_migrate = cc->nr_migratepages; err = migrate_pages(&cc->migratepages, compaction_alloc, - (unsigned long)cc, false, - cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC); + (unsigned long)&cc->freepages, false, + (cc->mode == COMPACT_SYNC) ? MIGRATE_SYNC_LIGHT + : MIGRATE_ASYNC); update_nr_listpages(cc); nr_remaining = cc->nr_migratepages; @@ -712,7 +793,8 @@ out: static unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, - bool sync) + enum compact_mode mode, + unsigned long *nr_pageblocks_skipped) { struct compact_control cc = { .nr_freepages = 0, @@ -720,12 +802,17 @@ static unsigned long compact_zone_order(struct zone *zone, .order = order, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, - .sync = sync, + .mode = mode, }; + unsigned long rc; + INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); - return compact_zone(zone, &cc); + rc = compact_zone(zone, &cc); + *nr_pageblocks_skipped = cc.nr_pageblocks_skipped; + + return rc; } int sysctl_extfrag_threshold = 500; @@ -750,6 +837,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, struct zoneref *z; struct zone *zone; int rc = COMPACT_SKIPPED; + unsigned long nr_pageblocks_skipped; + enum compact_mode mode; /* * Check whether it is worth even starting compaction. The order check is @@ -766,12 +855,22 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, nodemask) { int status; - status = compact_zone_order(zone, order, gfp_mask, sync); + mode = sync ? COMPACT_SYNC : COMPACT_ASYNC_MOVABLE; +retry: + status = compact_zone_order(zone, order, gfp_mask, mode, + &nr_pageblocks_skipped); rc = max(status, rc); /* If a normal allocation would succeed, stop compacting */ if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) break; + + if (rc == COMPACT_COMPLETE && mode == COMPACT_ASYNC_MOVABLE) { + if (nr_pageblocks_skipped) { + mode = COMPACT_ASYNC_UNMOVABLE; + goto retry; + } + } } return rc; @@ -805,7 +904,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) if (ok && cc->order > zone->compact_order_failed) zone->compact_order_failed = cc->order + 1; /* Currently async compaction is never deferred. */ - else if (!ok && cc->sync) + else if (!ok && cc->mode == COMPACT_SYNC) defer_compaction(zone, cc->order); } @@ -820,7 +919,7 @@ int compact_pgdat(pg_data_t *pgdat, int order) { struct compact_control cc = { .order = order, - .sync = false, + .mode = COMPACT_ASYNC_MOVABLE, }; return __compact_pgdat(pgdat, &cc); @@ -830,7 +929,7 @@ static int compact_node(int nid) { struct compact_control cc = { .order = -1, - .sync = true, + .mode = COMPACT_SYNC, }; return __compact_pgdat(NODE_DATA(nid), &cc); diff --git a/mm/filemap.c b/mm/filemap.c index 79c4b2b0b14e..64b48f934b89 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -29,7 +29,6 @@ #include <linux/pagevec.h> #include <linux/blkdev.h> #include <linux/security.h> -#include <linux/syscalls.h> #include <linux/cpuset.h> #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ #include <linux/memcontrol.h> @@ -1478,44 +1477,6 @@ out: } EXPORT_SYMBOL(generic_file_aio_read); -static ssize_t -do_readahead(struct address_space *mapping, struct file *filp, - pgoff_t index, unsigned long nr) -{ - if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) - return -EINVAL; - - force_page_cache_readahead(mapping, filp, index, nr); - return 0; -} - -SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count) -{ - ssize_t ret; - struct file *file; - - ret = -EBADF; - file = fget(fd); - if (file) { - if (file->f_mode & FMODE_READ) { - struct address_space *mapping = file->f_mapping; - pgoff_t start = offset >> PAGE_CACHE_SHIFT; - pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT; - unsigned long len = end - start + 1; - ret = do_readahead(mapping, file, start, len); - } - fput(file); - } - return ret; -} -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_readahead(long fd, loff_t offset, long count) -{ - return SYSC_readahead((int) fd, offset, (size_t) count); -} -SYSCALL_ALIAS(sys_readahead, SyS_readahead); -#endif - #ifdef CONFIG_MMU /** * page_cache_read - adds requested page to the page cache if not already there diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f0e5306eeb55..57c4b9309015 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -636,16 +636,12 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, unsigned long haddr, pmd_t *pmd, struct page *page) { - int ret = 0; pgtable_t pgtable; VM_BUG_ON(!PageCompound(page)); pgtable = pte_alloc_one(mm, haddr); - if (unlikely(!pgtable)) { - mem_cgroup_uncharge_page(page); - put_page(page); + if (unlikely(!pgtable)) return VM_FAULT_OOM; - } clear_huge_page(page, haddr, HPAGE_PMD_NR); __SetPageUptodate(page); @@ -675,7 +671,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, spin_unlock(&mm->page_table_lock); } - return ret; + return 0; } static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp) @@ -724,8 +720,14 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, put_page(page); goto out; } + if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, + page))) { + mem_cgroup_uncharge_page(page); + put_page(page); + goto out; + } - return __do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page); + return 0; } out: /* @@ -950,6 +952,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(THP_FAULT_FALLBACK); ret = do_huge_pmd_wp_page_fallback(mm, vma, address, pmd, orig_pmd, page, haddr); + if (ret & VM_FAULT_OOM) + split_huge_page(page); put_page(page); goto out; } @@ -957,6 +961,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { put_page(new_page); + split_huge_page(page); put_page(page); ret |= VM_FAULT_OOM; goto out; @@ -968,8 +973,10 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, spin_lock(&mm->page_table_lock); put_page(page); if (unlikely(!pmd_same(*pmd, orig_pmd))) { + spin_unlock(&mm->page_table_lock); mem_cgroup_uncharge_page(new_page); put_page(new_page); + goto out; } else { pmd_t entry; VM_BUG_ON(!PageHead(page)); @@ -1224,10 +1231,13 @@ static void __split_huge_page_refcount(struct page *page) { int i; struct zone *zone = page_zone(page); + struct lruvec *lruvec; int tail_count = 0; /* prevent PageLRU to go away from under us, and freeze lru stats */ spin_lock_irq(&zone->lru_lock); + lruvec = mem_cgroup_page_lruvec(page, zone); + compound_lock(page); /* complete memcg works before add pages to LRU */ mem_cgroup_split_huge_fixup(page); @@ -1302,13 +1312,12 @@ static void __split_huge_page_refcount(struct page *page) BUG_ON(!PageDirty(page_tail)); BUG_ON(!PageSwapBacked(page_tail)); - - lru_add_page_tail(zone, page, page_tail); + lru_add_page_tail(page, page_tail, lruvec); } atomic_sub(tail_count, &page->_count); BUG_ON(atomic_read(&page->_count) <= 0); - __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); + __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1); __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); ClearPageCompound(page); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4e28416c47fb..285a81e87ec8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -273,8 +273,8 @@ static long region_count(struct list_head *head, long f, long t) /* Locate each segment we overlap with, and count that overlap. */ list_for_each_entry(rg, head, link) { - int seg_from; - int seg_to; + long seg_from; + long seg_to; if (rg->to <= f) continue; @@ -2157,6 +2157,15 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma) kref_get(&reservations->refs); } +static void resv_map_put(struct vm_area_struct *vma) +{ + struct resv_map *reservations = vma_resv_map(vma); + + if (!reservations) + return; + kref_put(&reservations->refs, resv_map_release); +} + static void hugetlb_vm_op_close(struct vm_area_struct *vma) { struct hstate *h = hstate_vma(vma); @@ -2173,7 +2182,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) reserve = (end - start) - region_count(&reservations->regions, start, end); - kref_put(&reservations->refs, resv_map_release); + resv_map_put(vma); if (reserve) { hugetlb_acct_memory(h, -reserve); @@ -2991,12 +3000,16 @@ int hugetlb_reserve_pages(struct inode *inode, set_vma_resv_flags(vma, HPAGE_RESV_OWNER); } - if (chg < 0) - return chg; + if (chg < 0) { + ret = chg; + goto out_err; + } /* There must be enough pages in the subpool for the mapping */ - if (hugepage_subpool_get_pages(spool, chg)) - return -ENOSPC; + if (hugepage_subpool_get_pages(spool, chg)) { + ret = -ENOSPC; + goto out_err; + } /* * Check enough hugepages are available for the reservation. @@ -3005,7 +3018,7 @@ int hugetlb_reserve_pages(struct inode *inode, ret = hugetlb_acct_memory(h, chg); if (ret < 0) { hugepage_subpool_put_pages(spool, chg); - return ret; + goto out_err; } /* @@ -3022,6 +3035,9 @@ int hugetlb_reserve_pages(struct inode *inode, if (!vma || vma->vm_flags & VM_MAYSHARE) region_add(&inode->i_mapping->private_list, from, to); return 0; +out_err: + resv_map_put(vma); + return ret; } void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) diff --git a/mm/internal.h b/mm/internal.h index aee4761cf9a9..4194ab9dc19b 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -94,6 +94,9 @@ extern void putback_lru_page(struct page *page); /* * in mm/page_alloc.c */ +extern void set_pageblock_migratetype(struct page *page, int migratetype); +extern int move_freepages_block(struct zone *zone, struct page *page, + int migratetype); extern void __free_pages_bootmem(struct page *page, unsigned int order); extern void prep_compound_page(struct page *page, unsigned long order); #ifdef CONFIG_MEMORY_FAILURE @@ -101,6 +104,7 @@ extern bool is_free_buddy_page(struct page *page); #endif #if defined CONFIG_COMPACTION || defined CONFIG_CMA +#include <linux/compaction.h> /* * in mm/compaction.c @@ -119,11 +123,14 @@ struct compact_control { unsigned long nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ unsigned long migrate_pfn; /* isolate_migratepages search base */ - bool sync; /* Synchronous migration */ + enum compact_mode mode; /* Compaction mode */ int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; + + /* Number of UNMOVABLE destination pageblocks skipped during scan */ + unsigned long nr_pageblocks_skipped; }; unsigned long @@ -164,7 +171,8 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma) * to determine if it's being mapped into a LOCKED vma. * If so, mark page as mlocked. */ -static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page) +static inline int mlocked_vma_newpage(struct vm_area_struct *vma, + struct page *page) { VM_BUG_ON(PageLRU(page)); @@ -222,7 +230,7 @@ extern unsigned long vma_address(struct page *page, struct vm_area_struct *vma); #endif #else /* !CONFIG_MMU */ -static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p) +static inline int mlocked_vma_newpage(struct vm_area_struct *v, struct page *p) { return 0; } diff --git a/mm/madvise.c b/mm/madvise.c index 1ccbba5b6674..deff1b64a08c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -11,8 +11,10 @@ #include <linux/mempolicy.h> #include <linux/page-isolation.h> #include <linux/hugetlb.h> +#include <linux/falloc.h> #include <linux/sched.h> #include <linux/ksm.h> +#include <linux/fs.h> /* * Any behaviour which results in changes to the vma->vm_flags needs to @@ -200,8 +202,7 @@ static long madvise_remove(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { - struct address_space *mapping; - loff_t offset, endoff; + loff_t offset; int error; *prev = NULL; /* tell sys_madvise we drop mmap_sem */ @@ -217,16 +218,14 @@ static long madvise_remove(struct vm_area_struct *vma, if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)) return -EACCES; - mapping = vma->vm_file->f_mapping; - offset = (loff_t)(start - vma->vm_start) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - endoff = (loff_t)(end - vma->vm_start - 1) - + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - /* vmtruncate_range needs to take i_mutex */ + /* filesystem's fallocate may need to take i_mutex */ up_read(¤t->mm->mmap_sem); - error = vmtruncate_range(mapping->host, offset, endoff); + error = do_fallocate(vma->vm_file, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + offset, end - start); down_read(¤t->mm->mmap_sem); return error; } diff --git a/mm/memblock.c b/mm/memblock.c index a44eab3157f8..952123eba433 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -37,6 +37,8 @@ struct memblock memblock __initdata_memblock = { int memblock_debug __initdata_memblock; static int memblock_can_resize __initdata_memblock; +static int memblock_memory_in_slab __initdata_memblock = 0; +static int memblock_reserved_in_slab __initdata_memblock = 0; /* inline so we don't get a warning when pr_debug is compiled out */ static inline const char *memblock_type_name(struct memblock_type *type) @@ -187,6 +189,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) struct memblock_region *new_array, *old_array; phys_addr_t old_size, new_size, addr; int use_slab = slab_is_available(); + int *in_slab; /* We don't allow resizing until we know about the reserved regions * of memory that aren't suitable for allocation @@ -198,6 +201,12 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) old_size = type->max * sizeof(struct memblock_region); new_size = old_size << 1; + /* Retrieve the slab flag */ + if (type == &memblock.memory) + in_slab = &memblock_memory_in_slab; + else + in_slab = &memblock_reserved_in_slab; + /* Try to find some space for it. * * WARNING: We assume that either slab_is_available() and we use it or @@ -212,14 +221,15 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) if (use_slab) { new_array = kmalloc(new_size, GFP_KERNEL); addr = new_array ? __pa(new_array) : 0; - } else + } else { addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t)); + new_array = addr ? __va(addr) : 0; + } if (!addr) { pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", memblock_type_name(type), type->max, type->max * 2); return -1; } - new_array = __va(addr); memblock_dbg("memblock: %s array is doubled to %ld at [%#010llx-%#010llx]", memblock_type_name(type), type->max * 2, (u64)addr, (u64)addr + new_size - 1); @@ -234,22 +244,24 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) type->regions = new_array; type->max <<= 1; - /* If we use SLAB that's it, we are done */ - if (use_slab) - return 0; - - /* Add the new reserved region now. Should not fail ! */ - BUG_ON(memblock_reserve(addr, new_size)); - - /* If the array wasn't our static init one, then free it. We only do - * that before SLAB is available as later on, we don't know whether - * to use kfree or free_bootmem_pages(). Shouldn't be a big deal - * anyways + /* Free old array. We needn't free it if the array is the + * static one */ - if (old_array != memblock_memory_init_regions && - old_array != memblock_reserved_init_regions) + if (*in_slab) + kfree(old_array); + else if (old_array != memblock_memory_init_regions && + old_array != memblock_reserved_init_regions) memblock_free(__pa(old_array), old_size); + /* Reserve the new array if that comes from the memblock. + * Otherwise, we needn't do it + */ + if (!use_slab) + BUG_ON(memblock_reserve(addr, new_size)); + + /* Update slab flag */ + *in_slab = use_slab; + return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f342778a0c0a..ac35bccadb7b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -59,7 +59,7 @@ struct cgroup_subsys mem_cgroup_subsys __read_mostly; #define MEM_CGROUP_RECLAIM_RETRIES 5 -struct mem_cgroup *root_mem_cgroup __read_mostly; +static struct mem_cgroup *root_mem_cgroup __read_mostly; #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ @@ -73,7 +73,7 @@ static int really_do_swap_account __initdata = 0; #endif #else -#define do_swap_account (0) +#define do_swap_account 0 #endif @@ -88,18 +88,31 @@ enum mem_cgroup_stat_index { MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ - MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */ MEM_CGROUP_STAT_NSTATS, }; +static const char * const mem_cgroup_stat_names[] = { + "cache", + "rss", + "mapped_file", + "swap", +}; + enum mem_cgroup_events_index { MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ - MEM_CGROUP_EVENTS_COUNT, /* # of pages paged in/out */ MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */ MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */ MEM_CGROUP_EVENTS_NSTATS, }; + +static const char * const mem_cgroup_events_names[] = { + "pgpgin", + "pgpgout", + "pgfault", + "pgmajfault", +}; + /* * Per memcg event counter is incremented at every pagein/pageout. With THP, * it will be incremated by the number of pages. This counter is used for @@ -112,13 +125,14 @@ enum mem_cgroup_events_target { MEM_CGROUP_TARGET_NUMAINFO, MEM_CGROUP_NTARGETS, }; -#define THRESHOLDS_EVENTS_TARGET (128) -#define SOFTLIMIT_EVENTS_TARGET (1024) -#define NUMAINFO_EVENTS_TARGET (1024) +#define THRESHOLDS_EVENTS_TARGET 128 +#define SOFTLIMIT_EVENTS_TARGET 1024 +#define NUMAINFO_EVENTS_TARGET 1024 struct mem_cgroup_stat_cpu { long count[MEM_CGROUP_STAT_NSTATS]; unsigned long events[MEM_CGROUP_EVENTS_NSTATS]; + unsigned long nr_page_events; unsigned long targets[MEM_CGROUP_NTARGETS]; }; @@ -138,7 +152,6 @@ struct mem_cgroup_per_zone { struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; - struct zone_reclaim_stat reclaim_stat; struct rb_node tree_node; /* RB tree node */ unsigned long long usage_in_excess;/* Set to the value by which */ /* the soft limit is exceeded*/ @@ -182,7 +195,7 @@ struct mem_cgroup_threshold { /* For threshold */ struct mem_cgroup_threshold_ary { - /* An array index points to threshold just below usage. */ + /* An array index points to threshold just below or equal to usage. */ int current_threshold; /* Size of entries[] */ unsigned int size; @@ -245,8 +258,8 @@ struct mem_cgroup { */ struct rcu_head rcu_freeing; /* - * But when using vfree(), that cannot be done at - * interrupt time, so we must then queue the work. + * We also need some space for a worker in deferred freeing. + * By the time we call it, rcu_freeing is no longer in use. */ struct work_struct work_freeing; }; @@ -305,7 +318,7 @@ struct mem_cgroup { /* * percpu counter. */ - struct mem_cgroup_stat_cpu *stat; + struct mem_cgroup_stat_cpu __percpu *stat; /* * used when a cpu is offlined or other synchronizations * See mem_cgroup_read_stat(). @@ -360,8 +373,8 @@ static bool move_file(void) * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft * limit reclaim to prevent infinite loops, if they ever occur. */ -#define MEM_CGROUP_MAX_RECLAIM_LOOPS (100) -#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS (2) +#define MEM_CGROUP_MAX_RECLAIM_LOOPS 100 +#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2 enum charge_type { MEM_CGROUP_CHARGE_TYPE_CACHE = 0, @@ -377,8 +390,8 @@ enum charge_type { #define _MEM (0) #define _MEMSWAP (1) #define _OOM_TYPE (2) -#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) -#define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff) +#define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val)) +#define MEMFILE_TYPE(val) ((val) >> 16 & 0xffff) #define MEMFILE_ATTR(val) ((val) & 0xffff) /* Used for OOM nofiier */ #define OOM_CONTROL (0) @@ -404,6 +417,7 @@ void sock_update_memcg(struct sock *sk) { if (mem_cgroup_sockets_enabled) { struct mem_cgroup *memcg; + struct cg_proto *cg_proto; BUG_ON(!sk->sk_prot->proto_cgroup); @@ -423,9 +437,10 @@ void sock_update_memcg(struct sock *sk) rcu_read_lock(); memcg = mem_cgroup_from_task(current); - if (!mem_cgroup_is_root(memcg)) { + cg_proto = sk->sk_prot->proto_cgroup(memcg); + if (!mem_cgroup_is_root(memcg) && memcg_proto_active(cg_proto)) { mem_cgroup_get(memcg); - sk->sk_cgrp = sk->sk_prot->proto_cgroup(memcg); + sk->sk_cgrp = cg_proto; } rcu_read_unlock(); } @@ -454,6 +469,19 @@ EXPORT_SYMBOL(tcp_proto_cgroup); #endif /* CONFIG_INET */ #endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ +#if defined(CONFIG_INET) && defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) +static void disarm_sock_keys(struct mem_cgroup *memcg) +{ + if (!memcg_proto_activated(&memcg->tcp_mem.cg_proto)) + return; + static_key_slow_dec(&memcg_socket_limit_enabled); +} +#else +static void disarm_sock_keys(struct mem_cgroup *memcg) +{ +} +#endif + static void drain_all_stock_async(struct mem_cgroup *memcg); static struct mem_cgroup_per_zone * @@ -718,12 +746,21 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, nr_pages = -nr_pages; /* for event */ } - __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages); + __this_cpu_add(memcg->stat->nr_page_events, nr_pages); preempt_enable(); } unsigned long +mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) +{ + struct mem_cgroup_per_zone *mz; + + mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); + return mz->lru_size[lru]; +} + +static unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, unsigned int lru_mask) { @@ -770,7 +807,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, { unsigned long val, next; - val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]); + val = __this_cpu_read(memcg->stat->nr_page_events); next = __this_cpu_read(memcg->stat->targets[target]); /* from time_after() in jiffies.h */ if ((long)next - (long)val < 0) { @@ -1013,7 +1050,7 @@ EXPORT_SYMBOL(mem_cgroup_count_vm_event); /** * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg * @zone: zone of the wanted lruvec - * @mem: memcg of the wanted lruvec + * @memcg: memcg of the wanted lruvec * * Returns the lru list vector holding pages for the given @zone and * @mem. This can be the global zone lruvec, if the memory controller @@ -1046,19 +1083,11 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, */ /** - * mem_cgroup_lru_add_list - account for adding an lru page and return lruvec - * @zone: zone of the page + * mem_cgroup_page_lruvec - return lruvec for adding an lru page * @page: the page - * @lru: current lru - * - * This function accounts for @page being added to @lru, and returns - * the lruvec for the given @zone and the memcg @page is charged to. - * - * The callsite is then responsible for physically linking the page to - * the returned lruvec->lists[@lru]. + * @zone: zone of the page */ -struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, - enum lru_list lru) +struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) { struct mem_cgroup_per_zone *mz; struct mem_cgroup *memcg; @@ -1071,7 +1100,7 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, memcg = pc->mem_cgroup; /* - * Surreptitiously switch any uncharged page to root: + * Surreptitiously switch any uncharged offlist page to root: * an uncharged page off lru does nothing to secure * its former mem_cgroup from sudden removal. * @@ -1079,85 +1108,60 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, * under page_cgroup lock: between them, they make all uses * of pc->mem_cgroup safe. */ - if (!PageCgroupUsed(pc) && memcg != root_mem_cgroup) + if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup) pc->mem_cgroup = memcg = root_mem_cgroup; mz = page_cgroup_zoneinfo(memcg, page); - /* compound_order() is stabilized through lru_lock */ - mz->lru_size[lru] += 1 << compound_order(page); return &mz->lruvec; } /** - * mem_cgroup_lru_del_list - account for removing an lru page - * @page: the page - * @lru: target lru - * - * This function accounts for @page being removed from @lru. + * mem_cgroup_update_lru_size - account for adding or removing an lru page + * @lruvec: mem_cgroup per zone lru vector + * @lru: index of lru list the page is sitting on + * @nr_pages: positive when adding or negative when removing * - * The callsite is then responsible for physically unlinking - * @page->lru. + * This function must be called when a page is added to or removed from an + * lru list. */ -void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) +void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, + int nr_pages) { struct mem_cgroup_per_zone *mz; - struct mem_cgroup *memcg; - struct page_cgroup *pc; + unsigned long *lru_size; if (mem_cgroup_disabled()) return; - pc = lookup_page_cgroup(page); - memcg = pc->mem_cgroup; - VM_BUG_ON(!memcg); - mz = page_cgroup_zoneinfo(memcg, page); - /* huge page split is done under lru_lock. so, we have no races. */ - VM_BUG_ON(mz->lru_size[lru] < (1 << compound_order(page))); - mz->lru_size[lru] -= 1 << compound_order(page); -} - -void mem_cgroup_lru_del(struct page *page) -{ - mem_cgroup_lru_del_list(page, page_lru(page)); -} - -/** - * mem_cgroup_lru_move_lists - account for moving a page between lrus - * @zone: zone of the page - * @page: the page - * @from: current lru - * @to: target lru - * - * This function accounts for @page being moved between the lrus @from - * and @to, and returns the lruvec for the given @zone and the memcg - * @page is charged to. - * - * The callsite is then responsible for physically relinking - * @page->lru to the returned lruvec->lists[@to]. - */ -struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone, - struct page *page, - enum lru_list from, - enum lru_list to) -{ - /* XXX: Optimize this, especially for @from == @to */ - mem_cgroup_lru_del_list(page, from); - return mem_cgroup_lru_add_list(zone, page, to); + mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); + lru_size = mz->lru_size + lru; + *lru_size += nr_pages; + VM_BUG_ON((long)(*lru_size) < 0); } /* * Checks whether given mem is same or in the root_mem_cgroup's * hierarchy subtree */ +bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, + struct mem_cgroup *memcg) +{ + if (root_memcg == memcg) + return true; + if (!root_memcg->use_hierarchy) + return false; + return css_is_ancestor(&memcg->css, &root_memcg->css); +} + static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, - struct mem_cgroup *memcg) + struct mem_cgroup *memcg) { - if (root_memcg != memcg) { - return (root_memcg->use_hierarchy && - css_is_ancestor(&memcg->css, &root_memcg->css)); - } + bool ret; - return true; + rcu_read_lock(); + ret = __mem_cgroup_same_or_subtree(root_memcg, memcg); + rcu_read_unlock(); + return ret; } int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) @@ -1195,19 +1199,15 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) return ret; } -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) { unsigned long inactive_ratio; - int nid = zone_to_nid(zone); - int zid = zone_idx(zone); unsigned long inactive; unsigned long active; unsigned long gb; - inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, - BIT(LRU_INACTIVE_ANON)); - active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, - BIT(LRU_ACTIVE_ANON)); + inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON); + active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON); gb = (inactive + active) >> (30 - PAGE_SHIFT); if (gb) @@ -1218,49 +1218,17 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) return inactive * inactive_ratio < active; } -int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone) +int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec) { unsigned long active; unsigned long inactive; - int zid = zone_idx(zone); - int nid = zone_to_nid(zone); - inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, - BIT(LRU_INACTIVE_FILE)); - active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, - BIT(LRU_ACTIVE_FILE)); + inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_FILE); + active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_FILE); return (active > inactive); } -struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, - struct zone *zone) -{ - int nid = zone_to_nid(zone); - int zid = zone_idx(zone); - struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid); - - return &mz->reclaim_stat; -} - -struct zone_reclaim_stat * -mem_cgroup_get_reclaim_stat_from_page(struct page *page) -{ - struct page_cgroup *pc; - struct mem_cgroup_per_zone *mz; - - if (mem_cgroup_disabled()) - return NULL; - - pc = lookup_page_cgroup(page); - if (!PageCgroupUsed(pc)) - return NULL; - /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ - smp_rmb(); - mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); - return &mz->reclaim_stat; -} - #define mem_cgroup_from_res_counter(counter, member) \ container_of(counter, struct mem_cgroup, member) @@ -1634,7 +1602,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) * unused nodes. But scan_nodes is lazily updated and may not cotain * enough new information. We need to do double check. */ -bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) +static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) { int nid; @@ -1669,7 +1637,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) return 0; } -bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) +static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) { return test_mem_cgroup_node_reclaimable(memcg, 0, noswap); } @@ -1843,7 +1811,8 @@ static void memcg_oom_recover(struct mem_cgroup *memcg) /* * try to call OOM killer. returns false if we should exit memory-reclaim loop. */ -bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask, int order) +static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask, + int order) { struct oom_wait_info owait; bool locked, need_to_kill; @@ -1992,7 +1961,7 @@ struct memcg_stock_pcp { unsigned int nr_pages; struct work_struct work; unsigned long flags; -#define FLUSHING_CACHED_CHARGE (0) +#define FLUSHING_CACHED_CHARGE 0 }; static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); static DEFINE_MUTEX(percpu_charge_mutex); @@ -2139,7 +2108,7 @@ static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu) int i; spin_lock(&memcg->pcp_counter_lock); - for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { + for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { long x = per_cpu(memcg->stat->count[i], cpu); per_cpu(memcg->stat->count[i], cpu) = 0; @@ -2427,6 +2396,24 @@ static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg, } /* + * Cancel chrages in this cgroup....doesn't propagate to parent cgroup. + * This is useful when moving usage to parent cgroup. + */ +static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, + unsigned int nr_pages) +{ + unsigned long bytes = nr_pages * PAGE_SIZE; + + if (mem_cgroup_is_root(memcg)) + return; + + res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); + if (do_swap_account) + res_counter_uncharge_until(&memcg->memsw, + memcg->memsw.parent, bytes); +} + +/* * A helper function to get mem_cgroup from ID. must be called under * rcu_read_lock(). The caller must check css_is_removed() or some if * it's concern. (dropping refcnt from swap can be called against removed @@ -2481,6 +2468,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, { struct page_cgroup *pc = lookup_page_cgroup(page); struct zone *uninitialized_var(zone); + struct lruvec *lruvec; bool was_on_lru = false; bool anon; @@ -2503,8 +2491,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, zone = page_zone(page); spin_lock_irq(&zone->lru_lock); if (PageLRU(page)) { + lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); ClearPageLRU(page); - del_page_from_lru_list(zone, page, page_lru(page)); + del_page_from_lru_list(page, lruvec, page_lru(page)); was_on_lru = true; } } @@ -2522,9 +2511,10 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, if (lrucare) { if (was_on_lru) { + lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - add_page_to_lru_list(zone, page, page_lru(page)); + add_page_to_lru_list(page, lruvec, page_lru(page)); } spin_unlock_irq(&zone->lru_lock); } @@ -2547,7 +2537,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION)) +#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION) /* * Because tail pages are not marked as "used", set it. We're under * zone->lru_lock, 'splitting on pmd' and compound_lock. @@ -2578,23 +2568,19 @@ void mem_cgroup_split_huge_fixup(struct page *head) * @pc: page_cgroup of the page. * @from: mem_cgroup which the page is moved from. * @to: mem_cgroup which the page is moved to. @from != @to. - * @uncharge: whether we should call uncharge and css_put against @from. * * The caller must confirm following. * - page is not on LRU (isolate_page() is useful.) * - compound_lock is held when nr_pages > 1 * - * This function doesn't do "charge" nor css_get to new cgroup. It should be - * done by a caller(__mem_cgroup_try_charge would be useful). If @uncharge is - * true, this function does "uncharge" from old cgroup, but it doesn't if - * @uncharge is false, so a caller should do "uncharge". + * This function doesn't do "charge" to new cgroup and doesn't do "uncharge" + * from old cgroup. */ static int mem_cgroup_move_account(struct page *page, unsigned int nr_pages, struct page_cgroup *pc, struct mem_cgroup *from, - struct mem_cgroup *to, - bool uncharge) + struct mem_cgroup *to) { unsigned long flags; int ret; @@ -2628,9 +2614,6 @@ static int mem_cgroup_move_account(struct page *page, preempt_enable(); } mem_cgroup_charge_statistics(from, anon, -nr_pages); - if (uncharge) - /* This is not "cancel", but cancel_charge does all we need. */ - __mem_cgroup_cancel_charge(from, nr_pages); /* caller should have done css_get */ pc->mem_cgroup = to; @@ -2664,15 +2647,13 @@ static int mem_cgroup_move_parent(struct page *page, struct mem_cgroup *child, gfp_t gfp_mask) { - struct cgroup *cg = child->css.cgroup; - struct cgroup *pcg = cg->parent; struct mem_cgroup *parent; unsigned int nr_pages; unsigned long uninitialized_var(flags); int ret; /* Is ROOT ? */ - if (!pcg) + if (mem_cgroup_is_root(child)) return -EINVAL; ret = -EBUSY; @@ -2683,21 +2664,23 @@ static int mem_cgroup_move_parent(struct page *page, nr_pages = hpage_nr_pages(page); - parent = mem_cgroup_from_cont(pcg); - ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false); - if (ret) - goto put_back; + parent = parent_mem_cgroup(child); + /* + * If no parent, move charges to root cgroup. + */ + if (!parent) + parent = root_mem_cgroup; if (nr_pages > 1) flags = compound_lock_irqsave(page); - ret = mem_cgroup_move_account(page, nr_pages, pc, child, parent, true); - if (ret) - __mem_cgroup_cancel_charge(parent, nr_pages); + ret = mem_cgroup_move_account(page, nr_pages, + pc, child, parent); + if (!ret) + __mem_cgroup_cancel_local_charge(child, nr_pages); if (nr_pages > 1) compound_unlock_irqrestore(page, flags); -put_back: putback_lru_page(page); put: put_page(page); @@ -2845,24 +2828,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg, */ if (do_swap_account && PageSwapCache(page)) { swp_entry_t ent = {.val = page_private(page)}; - struct mem_cgroup *swap_memcg; - unsigned short id; - - id = swap_cgroup_record(ent, 0); - rcu_read_lock(); - swap_memcg = mem_cgroup_lookup(id); - if (swap_memcg) { - /* - * This recorded memcg can be obsolete one. So, avoid - * calling css_tryget - */ - if (!mem_cgroup_is_root(swap_memcg)) - res_counter_uncharge(&swap_memcg->memsw, - PAGE_SIZE); - mem_cgroup_swap_statistics(swap_memcg, false); - mem_cgroup_put(swap_memcg); - } - rcu_read_unlock(); + mem_cgroup_uncharge_swap(ent); } /* * At swapin, we may charge account against cgroup which has no tasks. @@ -3155,7 +3121,6 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) * @entry: swap entry to be moved * @from: mem_cgroup which the entry is moved from * @to: mem_cgroup which the entry is moved to - * @need_fixup: whether we should fixup res_counters and refcounts. * * It succeeds only when the swap_cgroup's record for this entry is the same * as the mem_cgroup's id of @from. @@ -3166,7 +3131,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) * both res and memsw, and called css_get(). */ static int mem_cgroup_move_swap_account(swp_entry_t entry, - struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup) + struct mem_cgroup *from, struct mem_cgroup *to) { unsigned short old_id, new_id; @@ -3185,24 +3150,13 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry, * swap-in, the refcount of @to might be decreased to 0. */ mem_cgroup_get(to); - if (need_fixup) { - if (!mem_cgroup_is_root(from)) - res_counter_uncharge(&from->memsw, PAGE_SIZE); - mem_cgroup_put(from); - /* - * we charged both to->res and to->memsw, so we should - * uncharge to->res. - */ - if (!mem_cgroup_is_root(to)) - res_counter_uncharge(&to->res, PAGE_SIZE); - } return 0; } return -EINVAL; } #else static inline int mem_cgroup_move_swap_account(swp_entry_t entry, - struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup) + struct mem_cgroup *from, struct mem_cgroup *to) { return -EINVAL; } @@ -3363,7 +3317,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg, void mem_cgroup_replace_page_cache(struct page *oldpage, struct page *newpage) { - struct mem_cgroup *memcg; + struct mem_cgroup *memcg = NULL; struct page_cgroup *pc; enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; @@ -3373,11 +3327,20 @@ void mem_cgroup_replace_page_cache(struct page *oldpage, pc = lookup_page_cgroup(oldpage); /* fix accounting on old pages */ lock_page_cgroup(pc); - memcg = pc->mem_cgroup; - mem_cgroup_charge_statistics(memcg, false, -1); - ClearPageCgroupUsed(pc); + if (PageCgroupUsed(pc)) { + memcg = pc->mem_cgroup; + mem_cgroup_charge_statistics(memcg, false, -1); + ClearPageCgroupUsed(pc); + } unlock_page_cgroup(pc); + /* + * When called from shmem_replace_page(), in some cases the + * oldpage has already been charged, and in some cases not. + */ + if (!memcg) + return; + if (PageSwapBacked(oldpage)) type = MEM_CGROUP_CHARGE_TYPE_SHMEM; @@ -3793,7 +3756,7 @@ try_to_free: goto move_account; } -int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) +static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) { return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true); } @@ -4051,103 +4014,13 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp, } #endif - -/* For read statistics */ -enum { - MCS_CACHE, - MCS_RSS, - MCS_FILE_MAPPED, - MCS_PGPGIN, - MCS_PGPGOUT, - MCS_SWAP, - MCS_PGFAULT, - MCS_PGMAJFAULT, - MCS_INACTIVE_ANON, - MCS_ACTIVE_ANON, - MCS_INACTIVE_FILE, - MCS_ACTIVE_FILE, - MCS_UNEVICTABLE, - NR_MCS_STAT, -}; - -struct mcs_total_stat { - s64 stat[NR_MCS_STAT]; -}; - -struct { - char *local_name; - char *total_name; -} memcg_stat_strings[NR_MCS_STAT] = { - {"cache", "total_cache"}, - {"rss", "total_rss"}, - {"mapped_file", "total_mapped_file"}, - {"pgpgin", "total_pgpgin"}, - {"pgpgout", "total_pgpgout"}, - {"swap", "total_swap"}, - {"pgfault", "total_pgfault"}, - {"pgmajfault", "total_pgmajfault"}, - {"inactive_anon", "total_inactive_anon"}, - {"active_anon", "total_active_anon"}, - {"inactive_file", "total_inactive_file"}, - {"active_file", "total_active_file"}, - {"unevictable", "total_unevictable"} -}; - - -static void -mem_cgroup_get_local_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s) -{ - s64 val; - - /* per cpu stat */ - val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE); - s->stat[MCS_CACHE] += val * PAGE_SIZE; - val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_RSS); - s->stat[MCS_RSS] += val * PAGE_SIZE; - val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); - s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; - val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGIN); - s->stat[MCS_PGPGIN] += val; - val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGOUT); - s->stat[MCS_PGPGOUT] += val; - if (do_swap_account) { - val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SWAPOUT); - s->stat[MCS_SWAP] += val * PAGE_SIZE; - } - val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGFAULT); - s->stat[MCS_PGFAULT] += val; - val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGMAJFAULT); - s->stat[MCS_PGMAJFAULT] += val; - - /* per zone stat */ - val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON)); - s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE; - val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)); - s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE; - val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE)); - s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE; - val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)); - s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; - val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE)); - s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; -} - -static void -mem_cgroup_get_total_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s) -{ - struct mem_cgroup *iter; - - for_each_mem_cgroup_tree(iter, memcg) - mem_cgroup_get_local_stat(iter, s); -} - #ifdef CONFIG_NUMA -static int mem_control_numa_stat_show(struct seq_file *m, void *arg) +static int mem_control_numa_stat_show(struct cgroup *cont, struct cftype *cft, + struct seq_file *m) { int nid; unsigned long total_nr, file_nr, anon_nr, unevictable_nr; unsigned long node_nr; - struct cgroup *cont = m->private; struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL); @@ -4188,64 +4061,100 @@ static int mem_control_numa_stat_show(struct seq_file *m, void *arg) } #endif /* CONFIG_NUMA */ +static const char * const mem_cgroup_lru_names[] = { + "inactive_anon", + "active_anon", + "inactive_file", + "active_file", + "unevictable", +}; + +static inline void mem_cgroup_lru_names_not_uptodate(void) +{ + BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); +} + static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, - struct cgroup_map_cb *cb) + struct seq_file *m) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - struct mcs_total_stat mystat; - int i; - - memset(&mystat, 0, sizeof(mystat)); - mem_cgroup_get_local_stat(memcg, &mystat); + struct mem_cgroup *mi; + unsigned int i; - - for (i = 0; i < NR_MCS_STAT; i++) { - if (i == MCS_SWAP && !do_swap_account) + for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { + if (i == MEM_CGROUP_STAT_SWAPOUT && !do_swap_account) continue; - cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]); + seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i], + mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); } + for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) + seq_printf(m, "%s %lu\n", mem_cgroup_events_names[i], + mem_cgroup_read_events(memcg, i)); + + for (i = 0; i < NR_LRU_LISTS; i++) + seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i], + mem_cgroup_nr_lru_pages(memcg, BIT(i)) * PAGE_SIZE); + /* Hierarchical information */ { unsigned long long limit, memsw_limit; memcg_get_hierarchical_limit(memcg, &limit, &memsw_limit); - cb->fill(cb, "hierarchical_memory_limit", limit); + seq_printf(m, "hierarchical_memory_limit %llu\n", limit); if (do_swap_account) - cb->fill(cb, "hierarchical_memsw_limit", memsw_limit); + seq_printf(m, "hierarchical_memsw_limit %llu\n", + memsw_limit); } - memset(&mystat, 0, sizeof(mystat)); - mem_cgroup_get_total_stat(memcg, &mystat); - for (i = 0; i < NR_MCS_STAT; i++) { - if (i == MCS_SWAP && !do_swap_account) + for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { + long long val = 0; + + if (i == MEM_CGROUP_STAT_SWAPOUT && !do_swap_account) continue; - cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]); + for_each_mem_cgroup_tree(mi, memcg) + val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; + seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val); + } + + for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { + unsigned long long val = 0; + + for_each_mem_cgroup_tree(mi, memcg) + val += mem_cgroup_read_events(mi, i); + seq_printf(m, "total_%s %llu\n", + mem_cgroup_events_names[i], val); + } + + for (i = 0; i < NR_LRU_LISTS; i++) { + unsigned long long val = 0; + + for_each_mem_cgroup_tree(mi, memcg) + val += mem_cgroup_nr_lru_pages(mi, BIT(i)) * PAGE_SIZE; + seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], val); } #ifdef CONFIG_DEBUG_VM { int nid, zid; struct mem_cgroup_per_zone *mz; + struct zone_reclaim_stat *rstat; unsigned long recent_rotated[2] = {0, 0}; unsigned long recent_scanned[2] = {0, 0}; for_each_online_node(nid) for (zid = 0; zid < MAX_NR_ZONES; zid++) { mz = mem_cgroup_zoneinfo(memcg, nid, zid); + rstat = &mz->lruvec.reclaim_stat; - recent_rotated[0] += - mz->reclaim_stat.recent_rotated[0]; - recent_rotated[1] += - mz->reclaim_stat.recent_rotated[1]; - recent_scanned[0] += - mz->reclaim_stat.recent_scanned[0]; - recent_scanned[1] += - mz->reclaim_stat.recent_scanned[1]; + recent_rotated[0] += rstat->recent_rotated[0]; + recent_rotated[1] += rstat->recent_rotated[1]; + recent_scanned[0] += rstat->recent_scanned[0]; + recent_scanned[1] += rstat->recent_scanned[1]; } - cb->fill(cb, "recent_rotated_anon", recent_rotated[0]); - cb->fill(cb, "recent_rotated_file", recent_rotated[1]); - cb->fill(cb, "recent_scanned_anon", recent_scanned[0]); - cb->fill(cb, "recent_scanned_file", recent_scanned[1]); + seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]); + seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]); + seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]); + seq_printf(m, "recent_scanned_file %lu\n", recent_scanned[1]); } #endif @@ -4307,7 +4216,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) usage = mem_cgroup_usage(memcg, swap); /* - * current_threshold points to threshold just below usage. + * current_threshold points to threshold just below or equal to usage. * If it's not true, a threshold was crossed after last * call of __mem_cgroup_threshold(). */ @@ -4433,14 +4342,15 @@ static int mem_cgroup_usage_register_event(struct cgroup *cgrp, /* Find current threshold */ new->current_threshold = -1; for (i = 0; i < size; i++) { - if (new->entries[i].threshold < usage) { + if (new->entries[i].threshold <= usage) { /* * new->current_threshold will not be used until * rcu_assign_pointer(), so it's safe to increment * it here. */ ++new->current_threshold; - } + } else + break; } /* Free old spare buffer and save old primary buffer as spare */ @@ -4509,7 +4419,7 @@ static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp, continue; new->entries[j] = thresholds->primary->entries[i]; - if (new->entries[j].threshold < usage) { + if (new->entries[j].threshold <= usage) { /* * new->current_threshold will not be used * until rcu_assign_pointer(), so it's safe to increment @@ -4623,22 +4533,6 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp, return 0; } -#ifdef CONFIG_NUMA -static const struct file_operations mem_control_numa_stat_file_operations = { - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int mem_control_numa_stat_open(struct inode *unused, struct file *file) -{ - struct cgroup *cont = file->f_dentry->d_parent->d_fsdata; - - file->f_op = &mem_control_numa_stat_file_operations; - return single_open(file, mem_control_numa_stat_show, cont); -} -#endif /* CONFIG_NUMA */ - #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { @@ -4694,7 +4588,7 @@ static struct cftype mem_cgroup_files[] = { }, { .name = "stat", - .read_map = mem_control_stat_show, + .read_seq_string = mem_control_stat_show, }, { .name = "force_empty", @@ -4726,8 +4620,7 @@ static struct cftype mem_cgroup_files[] = { #ifdef CONFIG_NUMA { .name = "numa_stat", - .open = mem_control_numa_stat_open, - .mode = S_IRUGO, + .read_seq_string = mem_control_numa_stat_show, }, #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP @@ -4764,7 +4657,6 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) { struct mem_cgroup_per_node *pn; struct mem_cgroup_per_zone *mz; - enum lru_list lru; int zone, tmp = node; /* * This routine is called against possible nodes. @@ -4782,8 +4674,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) for (zone = 0; zone < MAX_NR_ZONES; zone++) { mz = &pn->zoneinfo[zone]; - for_each_lru(lru) - INIT_LIST_HEAD(&mz->lruvec.lists[lru]); + lruvec_init(&mz->lruvec, &NODE_DATA(node)->node_zones[zone]); mz->usage_in_excess = 0; mz->on_tree = false; mz->memcg = memcg; @@ -4826,23 +4717,40 @@ out_free: } /* - * Helpers for freeing a vzalloc()ed mem_cgroup by RCU, + * Helpers for freeing a kmalloc()ed/vzalloc()ed mem_cgroup by RCU, * but in process context. The work_freeing structure is overlaid * on the rcu_freeing structure, which itself is overlaid on memsw. */ -static void vfree_work(struct work_struct *work) +static void free_work(struct work_struct *work) { struct mem_cgroup *memcg; + int size = sizeof(struct mem_cgroup); memcg = container_of(work, struct mem_cgroup, work_freeing); - vfree(memcg); + /* + * We need to make sure that (at least for now), the jump label + * destruction code runs outside of the cgroup lock. This is because + * get_online_cpus(), which is called from the static_branch update, + * can't be called inside the cgroup_lock. cpusets are the ones + * enforcing this dependency, so if they ever change, we might as well. + * + * schedule_work() will guarantee this happens. Be careful if you need + * to move this code around, and make sure it is outside + * the cgroup_lock. + */ + disarm_sock_keys(memcg); + if (size < PAGE_SIZE) + kfree(memcg); + else + vfree(memcg); } -static void vfree_rcu(struct rcu_head *rcu_head) + +static void free_rcu(struct rcu_head *rcu_head) { struct mem_cgroup *memcg; memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing); - INIT_WORK(&memcg->work_freeing, vfree_work); + INIT_WORK(&memcg->work_freeing, free_work); schedule_work(&memcg->work_freeing); } @@ -4868,10 +4776,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) free_mem_cgroup_per_zone_info(memcg, node); free_percpu(memcg->stat); - if (sizeof(struct mem_cgroup) < PAGE_SIZE) - kfree_rcu(memcg, rcu_freeing); - else - call_rcu(&memcg->rcu_freeing, vfree_rcu); + call_rcu(&memcg->rcu_freeing, free_rcu); } static void mem_cgroup_get(struct mem_cgroup *memcg) @@ -5135,7 +5040,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma, return NULL; if (PageAnon(page)) { /* we don't move shared anon */ - if (!move_anon() || page_mapcount(page) > 2) + if (!move_anon()) return NULL; } else if (!move_file()) /* we ignore mapcount for file pages */ @@ -5146,32 +5051,37 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma, return page; } +#ifdef CONFIG_SWAP static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, unsigned long addr, pte_t ptent, swp_entry_t *entry) { - int usage_count; struct page *page = NULL; swp_entry_t ent = pte_to_swp_entry(ptent); if (!move_anon() || non_swap_entry(ent)) return NULL; - usage_count = mem_cgroup_count_swap_user(ent, &page); - if (usage_count > 1) { /* we don't move shared anon */ - if (page) - put_page(page); - return NULL; - } + /* + * Because lookup_swap_cache() updates some statistics counter, + * we call find_get_page() with swapper_space directly. + */ + page = find_get_page(&swapper_space, ent.val); if (do_swap_account) entry->val = ent.val; return page; } +#else +static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, + unsigned long addr, pte_t ptent, swp_entry_t *entry) +{ + return NULL; +} +#endif static struct page *mc_handle_file_pte(struct vm_area_struct *vma, unsigned long addr, pte_t ptent, swp_entry_t *entry) { struct page *page = NULL; - struct inode *inode; struct address_space *mapping; pgoff_t pgoff; @@ -5180,7 +5090,6 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, if (!move_file()) return NULL; - inode = vma->vm_file->f_path.dentry->d_inode; mapping = vma->vm_file->f_mapping; if (pte_none(ptent)) pgoff = linear_page_index(vma, addr); @@ -5479,8 +5388,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, if (!isolate_lru_page(page)) { pc = lookup_page_cgroup(page); if (!mem_cgroup_move_account(page, HPAGE_PMD_NR, - pc, mc.from, mc.to, - false)) { + pc, mc.from, mc.to)) { mc.precharge -= HPAGE_PMD_NR; mc.moved_charge += HPAGE_PMD_NR; } @@ -5510,7 +5418,7 @@ retry: goto put; pc = lookup_page_cgroup(page); if (!mem_cgroup_move_account(page, 1, pc, - mc.from, mc.to, false)) { + mc.from, mc.to)) { mc.precharge--; /* we uncharge from mc.from later. */ mc.moved_charge++; @@ -5521,8 +5429,7 @@ put: /* get_mctgt_type() gets the page */ break; case MC_TARGET_SWAP: ent = target.ent; - if (!mem_cgroup_move_swap_account(ent, - mc.from, mc.to, false)) { + if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to)) { mc.precharge--; /* we fixup refcnts and charges later. */ mc.moved_swap++; @@ -5598,7 +5505,6 @@ static void mem_cgroup_move_task(struct cgroup *cont, if (mm) { if (mc.to) mem_cgroup_move_charge(mm); - put_swap_token(mm); mmput(mm); } if (mc.to) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c99ad4e6b88c..ab1e7145e290 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1388,16 +1388,16 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) */ if (!get_page_unless_zero(compound_head(p))) { if (PageHuge(p)) { - pr_info("get_any_page: %#lx free huge page\n", pfn); + pr_info("%s: %#lx free huge page\n", __func__, pfn); ret = dequeue_hwpoisoned_huge_page(compound_head(p)); } else if (is_free_buddy_page(p)) { - pr_info("get_any_page: %#lx free buddy page\n", pfn); + pr_info("%s: %#lx free buddy page\n", __func__, pfn); /* Set hwpoison bit while page is still isolated */ SetPageHWPoison(p); ret = 0; } else { - pr_info("get_any_page: %#lx: unknown zero refcount page type %lx\n", - pfn, p->flags); + pr_info("%s: %#lx: unknown zero refcount page type %lx\n", + __func__, pfn, p->flags); ret = -EIO; } } else { diff --git a/mm/memory.c b/mm/memory.c index e40f6759ba98..1b7dc662bf9f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2908,7 +2908,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, delayacct_set_flag(DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry); if (!page) { - grab_swap_token(mm); /* Contend for token _before_ read-in */ page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vma, address); if (!page) { @@ -2938,6 +2937,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, } locked = lock_page_or_retry(page, mm, flags); + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); if (!locked) { ret |= VM_FAULT_RETRY; @@ -3486,6 +3486,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(is_vm_hugetlb_page(vma))) return hugetlb_fault(mm, vma, address, flags); +retry: pgd = pgd_offset(mm, address); pud = pud_alloc(mm, pgd, address); if (!pud) @@ -3499,13 +3500,24 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, pmd, flags); } else { pmd_t orig_pmd = *pmd; + int ret; + barrier(); if (pmd_trans_huge(orig_pmd)) { if (flags & FAULT_FLAG_WRITE && !pmd_write(orig_pmd) && - !pmd_trans_splitting(orig_pmd)) - return do_huge_pmd_wp_page(mm, vma, address, - pmd, orig_pmd); + !pmd_trans_splitting(orig_pmd)) { + ret = do_huge_pmd_wp_page(mm, vma, address, pmd, + orig_pmd); + /* + * If COW results in an oom, the huge pmd will + * have been split, so retry the fault on the + * pte for a smaller charge. + */ + if (unlikely(ret & VM_FAULT_OOM)) + goto retry; + return ret; + } return 0; } } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fc898cb4fe8f..0d7e3ec8e0f3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -74,8 +74,7 @@ static struct resource *register_memory_resource(u64 start, u64 size) res->end = start + size - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; if (request_resource(&iomem_resource, res) < 0) { - printk("System RAM resource %llx - %llx cannot be added\n", - (unsigned long long)res->start, (unsigned long long)res->end); + printk("System RAM resource %pR cannot be added\n", res); kfree(res); res = NULL; } @@ -502,8 +501,10 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages) online_pages_range); if (ret) { mutex_unlock(&zonelists_mutex); - printk(KERN_DEBUG "online_pages %lx at %lx failed\n", - nr_pages, pfn); + printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] failed\n", + (unsigned long long) pfn << PAGE_SHIFT, + (((unsigned long long) pfn + nr_pages) + << PAGE_SHIFT) - 1); memory_notify(MEM_CANCEL_ONLINE, &arg); unlock_memory_hotplug(); return ret; @@ -977,8 +978,9 @@ repeat: return 0; failed_removal: - printk(KERN_INFO "memory offlining %lx to %lx failed\n", - start_pfn, end_pfn); + printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] failed\n", + (unsigned long long) start_pfn << PAGE_SHIFT, + ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 88f9422b92e7..f15c1b24ca18 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -390,7 +390,7 @@ static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask, { if (!pol) return; - if (!mpol_store_user_nodemask(pol) && step == 0 && + if (!mpol_store_user_nodemask(pol) && step == MPOL_REBIND_ONCE && nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) return; @@ -950,8 +950,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, * * Returns the number of page that could not be moved. */ -int do_migrate_pages(struct mm_struct *mm, - const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) +int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, + const nodemask_t *to, int flags) { int busy = 0; int err; @@ -963,7 +963,7 @@ int do_migrate_pages(struct mm_struct *mm, down_read(&mm->mmap_sem); - err = migrate_vmas(mm, from_nodes, to_nodes, flags); + err = migrate_vmas(mm, from, to, flags); if (err) goto out; @@ -998,14 +998,34 @@ int do_migrate_pages(struct mm_struct *mm, * moved to an empty node, then there is nothing left worth migrating. */ - tmp = *from_nodes; + tmp = *from; while (!nodes_empty(tmp)) { int s,d; int source = -1; int dest = 0; for_each_node_mask(s, tmp) { - d = node_remap(s, *from_nodes, *to_nodes); + + /* + * do_migrate_pages() tries to maintain the relative + * node relationship of the pages established between + * threads and memory areas. + * + * However if the number of source nodes is not equal to + * the number of destination nodes we can not preserve + * this node relative relationship. In that case, skip + * copying memory from a node that is in the destination + * mask. + * + * Example: [2,3,4] -> [3,4,5] moves everything. + * [0-7] - > [3,4,5] moves only 0,1,2,6,7. + */ + + if ((nodes_weight(*from) != nodes_weight(*to)) && + (node_isset(s, *to))) + continue; + + d = node_remap(s, *from, *to); if (s == d) continue; @@ -1065,8 +1085,8 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist, { } -int do_migrate_pages(struct mm_struct *mm, - const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) +int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, + const nodemask_t *to, int flags) { return -ENOSYS; } diff --git a/mm/mmap.c b/mm/mmap.c index e8dcfc7de866..4a9c2a391e28 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1639,33 +1639,34 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma = NULL; - if (mm) { - /* Check the cache first. */ - /* (Cache hit rate is typically around 35%.) */ - vma = mm->mmap_cache; - if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) { - struct rb_node * rb_node; - - rb_node = mm->mm_rb.rb_node; - vma = NULL; - - while (rb_node) { - struct vm_area_struct * vma_tmp; - - vma_tmp = rb_entry(rb_node, - struct vm_area_struct, vm_rb); - - if (vma_tmp->vm_end > addr) { - vma = vma_tmp; - if (vma_tmp->vm_start <= addr) - break; - rb_node = rb_node->rb_left; - } else - rb_node = rb_node->rb_right; - } - if (vma) - mm->mmap_cache = vma; + if (WARN_ON_ONCE(!mm)) /* Remove this in linux-3.6 */ + return NULL; + + /* Check the cache first. */ + /* (Cache hit rate is typically around 35%.) */ + vma = mm->mmap_cache; + if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) { + struct rb_node *rb_node; + + rb_node = mm->mm_rb.rb_node; + vma = NULL; + + while (rb_node) { + struct vm_area_struct *vma_tmp; + + vma_tmp = rb_entry(rb_node, + struct vm_area_struct, vm_rb); + + if (vma_tmp->vm_end > addr) { + vma = vma_tmp; + if (vma_tmp->vm_start <= addr) + break; + rb_node = rb_node->rb_left; + } else + rb_node = rb_node->rb_right; } + if (vma) + mm->mmap_cache = vma; } return vma; } diff --git a/mm/mmzone.c b/mm/mmzone.c index 7cf7b7ddc7c5..6830eab5bf09 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -86,3 +86,17 @@ int memmap_valid_within(unsigned long pfn, return 1; } #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ + +void lruvec_init(struct lruvec *lruvec, struct zone *zone) +{ + enum lru_list lru; + + memset(lruvec, 0, sizeof(struct lruvec)); + + for_each_lru(lru) + INIT_LIST_HEAD(&lruvec->lists[lru]); + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR + lruvec->zone = zone; +#endif +} diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 1983fb1c7026..d23415c001bc 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -274,86 +274,85 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, return ___alloc_bootmem(size, align, goal, limit); } -/** - * __alloc_bootmem_node - allocate boot memory from a specific node - * @pgdat: node to allocate from - * @size: size of the request in bytes - * @align: alignment of the region - * @goal: preferred starting address of the region - * - * The goal is dropped if it can not be satisfied and the allocation will - * fall back to memory below @goal. - * - * Allocation may fall back to any node in the system if the specified node - * can not hold the requested memory. - * - * The function panics if the request can not be satisfied. - */ -void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, - unsigned long align, unsigned long goal) +static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal, + unsigned long limit) { void *ptr; - if (WARN_ON_ONCE(slab_is_available())) - return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); - again: ptr = __alloc_memory_core_early(pgdat->node_id, size, align, - goal, -1ULL); + goal, limit); if (ptr) return ptr; ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, - goal, -1ULL); - if (!ptr && goal) { + goal, limit); + if (ptr) + return ptr; + + if (goal) { goal = 0; goto again; } - return ptr; + + return NULL; } -void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, +void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { - return __alloc_bootmem_node(pgdat, size, align, goal); + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); + + return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0); } -#ifdef CONFIG_SPARSEMEM -/** - * alloc_bootmem_section - allocate boot memory from a specific section - * @size: size of the request in bytes - * @section_nr: sparse map section to allocate from - * - * Return NULL on failure. - */ -void * __init alloc_bootmem_section(unsigned long size, - unsigned long section_nr) +void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, + unsigned long align, unsigned long goal, + unsigned long limit) { - unsigned long pfn, goal, limit; + void *ptr; - pfn = section_nr_to_pfn(section_nr); - goal = pfn << PAGE_SHIFT; - limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT; + ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, limit); + if (ptr) + return ptr; - return __alloc_memory_core_early(early_pfn_to_nid(pfn), size, - SMP_CACHE_BYTES, goal, limit); + printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size); + panic("Out of memory"); + return NULL; } -#endif -void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, +/** + * __alloc_bootmem_node - allocate boot memory from a specific node + * @pgdat: node to allocate from + * @size: size of the request in bytes + * @align: alignment of the region + * @goal: preferred starting address of the region + * + * The goal is dropped if it can not be satisfied and the allocation will + * fall back to memory below @goal. + * + * Allocation may fall back to any node in the system if the specified node + * can not hold the requested memory. + * + * The function panics if the request can not be satisfied. + */ +void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { - void *ptr; - if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); - ptr = __alloc_memory_core_early(pgdat->node_id, size, align, - goal, -1ULL); - if (ptr) - return ptr; + return ___alloc_bootmem_node(pgdat, size, align, goal, 0); +} - return __alloc_bootmem_nopanic(size, align, goal); +void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, + unsigned long align, unsigned long goal) +{ + return __alloc_bootmem_node(pgdat, size, align, goal); } #ifndef ARCH_LOW_ADDRESS_LIMIT @@ -397,16 +396,9 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { - void *ptr; - if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); - ptr = __alloc_memory_core_early(pgdat->node_id, size, align, - goal, ARCH_LOW_ADDRESS_LIMIT); - if (ptr) - return ptr; - - return __alloc_memory_core_early(MAX_NUMNODES, size, align, - goal, ARCH_LOW_ADDRESS_LIMIT); + return ___alloc_bootmem_node(pgdat, size, align, goal, + ARCH_LOW_ADDRESS_LIMIT); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 9f09a1fde9f9..ed0e19677360 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -180,10 +180,10 @@ static bool oom_unkillable_task(struct task_struct *p, * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ -unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg, - const nodemask_t *nodemask, unsigned long totalpages) +unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, + const nodemask_t *nodemask, unsigned long totalpages) { - long points; + unsigned long points; if (oom_unkillable_task(p, memcg, nodemask)) return 0; @@ -198,21 +198,11 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg, } /* - * The memory controller may have a limit of 0 bytes, so avoid a divide - * by zero, if necessary. - */ - if (!totalpages) - totalpages = 1; - - /* * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ - points = get_mm_rss(p->mm) + p->mm->nr_ptes; - points += get_mm_counter(p->mm, MM_SWAPENTS); - - points *= 1000; - points /= totalpages; + points = get_mm_rss(p->mm) + p->mm->nr_ptes + + get_mm_counter(p->mm, MM_SWAPENTS); task_unlock(p); /* @@ -220,23 +210,20 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg, * implementation used by LSMs. */ if (has_capability_noaudit(p, CAP_SYS_ADMIN)) - points -= 30; + points -= 30 * totalpages / 1000; /* * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may * either completely disable oom killing or always prefer a certain * task. */ - points += p->signal->oom_score_adj; + points += p->signal->oom_score_adj * totalpages / 1000; /* - * Never return 0 for an eligible task that may be killed since it's - * possible that no single user task uses more than 0.1% of memory and - * no single admin tasks uses more than 3.0%. + * Never return 0 for an eligible task regardless of the root bonus and + * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). */ - if (points <= 0) - return 1; - return (points < 1000) ? points : 1000; + return points ? points : 1; } /* @@ -314,7 +301,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, { struct task_struct *g, *p; struct task_struct *chosen = NULL; - *ppoints = 0; + unsigned long chosen_points = 0; do_each_thread(g, p) { unsigned int points; @@ -354,7 +341,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, */ if (p == current) { chosen = p; - *ppoints = 1000; + chosen_points = ULONG_MAX; } else if (!force_kill) { /* * If this task is not being ptraced on exit, @@ -367,12 +354,13 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, } points = oom_badness(p, memcg, nodemask, totalpages); - if (points > *ppoints) { + if (points > chosen_points) { chosen = p; - *ppoints = points; + chosen_points = points; } } while_each_thread(g, p); + *ppoints = chosen_points * 1000 / totalpages; return chosen; } @@ -572,7 +560,7 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, } check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL); - limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT; + limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1; read_lock(&tasklist_lock); p = select_bad_process(&points, limit, memcg, NULL, false); if (p && PTR_ERR(p) != -1UL) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bab8e3bc4202..6092f331b32e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -219,7 +219,7 @@ EXPORT_SYMBOL(nr_online_nodes); int page_group_by_mobility_disabled __read_mostly; -static void set_pageblock_migratetype(struct page *page, int migratetype) +void set_pageblock_migratetype(struct page *page, int migratetype) { if (unlikely(page_group_by_mobility_disabled)) @@ -954,8 +954,8 @@ static int move_freepages(struct zone *zone, return pages_moved; } -static int move_freepages_block(struct zone *zone, struct page *page, - int migratetype) +int move_freepages_block(struct zone *zone, struct page *page, + int migratetype) { unsigned long start_pfn, end_pfn; struct page *start_page, *end_page; @@ -4300,25 +4300,24 @@ static inline void setup_usemap(struct pglist_data *pgdat, #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE -/* Return a sensible default order for the pageblock size. */ -static inline int pageblock_default_order(void) -{ - if (HPAGE_SHIFT > PAGE_SHIFT) - return HUGETLB_PAGE_ORDER; - - return MAX_ORDER-1; -} - /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ -static inline void __init set_pageblock_order(unsigned int order) +static inline void __init set_pageblock_order(void) { + unsigned int order; + /* Check that pageblock_nr_pages has not already been setup */ if (pageblock_order) return; + if (HPAGE_SHIFT > PAGE_SHIFT) + order = HUGETLB_PAGE_ORDER; + else + order = MAX_ORDER - 1; + /* * Assume the largest contiguous order of interest is a huge page. - * This value may be variable depending on boot parameters on IA64 + * This value may be variable depending on boot parameters on IA64 and + * powerpc. */ pageblock_order = order; } @@ -4326,15 +4325,13 @@ static inline void __init set_pageblock_order(unsigned int order) /* * When CONFIG_HUGETLB_PAGE_SIZE_VARIABLE is not set, set_pageblock_order() - * and pageblock_default_order() are unused as pageblock_order is set - * at compile-time. See include/linux/pageblock-flags.h for the values of - * pageblock_order based on the kernel config + * is unused as pageblock_order is set at compile-time. See + * include/linux/pageblock-flags.h for the values of pageblock_order based on + * the kernel config */ -static inline int pageblock_default_order(unsigned int order) +static inline void set_pageblock_order(void) { - return MAX_ORDER-1; } -#define set_pageblock_order(x) do {} while (0) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ @@ -4361,7 +4358,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, memmap_pages; - enum lru_list lru; size = zone_spanned_pages_in_node(nid, j, zones_size); realsize = size - zone_absent_pages_in_node(nid, j, @@ -4411,18 +4407,13 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, zone->zone_pgdat = pgdat; zone_pcp_init(zone); - for_each_lru(lru) - INIT_LIST_HEAD(&zone->lruvec.lists[lru]); - zone->reclaim_stat.recent_rotated[0] = 0; - zone->reclaim_stat.recent_rotated[1] = 0; - zone->reclaim_stat.recent_scanned[0] = 0; - zone->reclaim_stat.recent_scanned[1] = 0; + lruvec_init(&zone->lruvec, zone); zap_zone_vm_stats(zone); zone->flags = 0; if (!size) continue; - set_pageblock_order(pageblock_default_order()); + set_pageblock_order(); setup_usemap(pgdat, zone, size); ret = init_currently_empty_zone(zone, zone_start_pfn, size, MEMMAP_EARLY); @@ -4815,7 +4806,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) find_zone_movable_pfns_for_nodes(); /* Print out the zone ranges */ - printk("Zone PFN ranges:\n"); + printk("Zone ranges:\n"); for (i = 0; i < MAX_NR_ZONES; i++) { if (i == ZONE_MOVABLE) continue; @@ -4824,22 +4815,25 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) arch_zone_highest_possible_pfn[i]) printk(KERN_CONT "empty\n"); else - printk(KERN_CONT "%0#10lx -> %0#10lx\n", - arch_zone_lowest_possible_pfn[i], - arch_zone_highest_possible_pfn[i]); + printk(KERN_CONT "[mem %0#10lx-%0#10lx]\n", + arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT, + (arch_zone_highest_possible_pfn[i] + << PAGE_SHIFT) - 1); } /* Print out the PFNs ZONE_MOVABLE begins at in each node */ - printk("Movable zone start PFN for each node\n"); + printk("Movable zone start for each node\n"); for (i = 0; i < MAX_NUMNODES; i++) { if (zone_movable_pfn[i]) - printk(" Node %d: %lu\n", i, zone_movable_pfn[i]); + printk(" Node %d: %#010lx\n", i, + zone_movable_pfn[i] << PAGE_SHIFT); } /* Print out the early_node_map[] */ - printk("Early memory PFN ranges\n"); + printk("Early memory node ranges\n"); for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) - printk(" %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn); + printk(" node %3d: [mem %#010lx-%#010lx]\n", nid, + start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); /* Initialise every node */ mminit_verify_pageflags_layout(); @@ -5657,7 +5651,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) .nr_migratepages = 0, .order = -1, .zone = page_zone(pfn_to_page(start)), - .sync = true, + .mode = COMPACT_SYNC, }; INIT_LIST_HEAD(&cc.migratepages); @@ -5938,7 +5932,7 @@ bool is_free_buddy_page(struct page *page) } #endif -static struct trace_print_flags pageflag_names[] = { +static const struct trace_print_flags pageflag_names[] = { {1UL << PG_locked, "locked" }, {1UL << PG_error, "error" }, {1UL << PG_referenced, "referenced" }, @@ -5973,7 +5967,9 @@ static struct trace_print_flags pageflag_names[] = { #ifdef CONFIG_MEMORY_FAILURE {1UL << PG_hwpoison, "hwpoison" }, #endif - {-1UL, NULL }, +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + {1UL << PG_compound_lock, "compound_lock" }, +#endif }; static void dump_page_flags(unsigned long flags) @@ -5982,12 +5978,14 @@ static void dump_page_flags(unsigned long flags) unsigned long mask; int i; + BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS); + printk(KERN_ALERT "page flags: %#lx(", flags); /* remove zone id */ flags &= (1UL << NR_PAGEFLAGS) - 1; - for (i = 0; pageflag_names[i].name && flags; i++) { + for (i = 0; i < ARRAY_SIZE(pageflag_names) && flags; i++) { mask = pageflag_names[i].mask; if ((flags & mask) != mask) diff --git a/mm/readahead.c b/mm/readahead.c index cbcbb02f3e28..ea8f8fa21649 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -17,6 +17,8 @@ #include <linux/task_io_accounting_ops.h> #include <linux/pagevec.h> #include <linux/pagemap.h> +#include <linux/syscalls.h> +#include <linux/file.h> /* * Initialise a struct file's readahead state. Assumes that the caller has @@ -562,3 +564,41 @@ page_cache_async_readahead(struct address_space *mapping, ondemand_readahead(mapping, ra, filp, true, offset, req_size); } EXPORT_SYMBOL_GPL(page_cache_async_readahead); + +static ssize_t +do_readahead(struct address_space *mapping, struct file *filp, + pgoff_t index, unsigned long nr) +{ + if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) + return -EINVAL; + + force_page_cache_readahead(mapping, filp, index, nr); + return 0; +} + +SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count) +{ + ssize_t ret; + struct file *file; + + ret = -EBADF; + file = fget(fd); + if (file) { + if (file->f_mode & FMODE_READ) { + struct address_space *mapping = file->f_mapping; + pgoff_t start = offset >> PAGE_CACHE_SHIFT; + pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT; + unsigned long len = end - start + 1; + ret = do_readahead(mapping, file, start, len); + } + fput(file); + } + return ret; +} +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_readahead(long fd, loff_t offset, long count) +{ + return SYSC_readahead((int) fd, offset, (size_t) count); +} +SYSCALL_ALIAS(sys_readahead, SyS_readahead); +#endif diff --git a/mm/rmap.c b/mm/rmap.c index 5b5ad584ffb7..0f3b7cda2a24 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -755,12 +755,6 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, pte_unmap_unlock(pte, ptl); } - /* Pretend the page is referenced if the task has the - swap token and is in the middle of a page fault. */ - if (mm != current->mm && has_swap_token(mm) && - rwsem_is_locked(&mm->mmap_sem)) - referenced++; - (*mapcount)--; if (referenced) diff --git a/mm/shmem.c b/mm/shmem.c index be5af34a070d..d576b84d913c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -53,6 +53,7 @@ static struct vfsmount *shm_mnt; #include <linux/blkdev.h> #include <linux/pagevec.h> #include <linux/percpu_counter.h> +#include <linux/falloc.h> #include <linux/splice.h> #include <linux/security.h> #include <linux/swapops.h> @@ -83,12 +84,25 @@ struct shmem_xattr { char value[0]; }; +/* + * shmem_fallocate and shmem_writepage communicate via inode->i_private + * (with i_mutex making sure that it has only one user at a time): + * we would prefer not to enlarge the shmem inode just for that. + */ +struct shmem_falloc { + pgoff_t start; /* start of range currently being fallocated */ + pgoff_t next; /* the next page offset to be fallocated */ + pgoff_t nr_falloced; /* how many new pages have been fallocated */ + pgoff_t nr_unswapped; /* how often writepage refused to swap out */ +}; + /* Flag allocation requirements to shmem_getpage */ enum sgp_type { SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_CACHE, /* don't exceed i_size, may allocate page */ SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */ - SGP_WRITE, /* may exceed i_size, may allocate page */ + SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */ + SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ }; #ifdef CONFIG_TMPFS @@ -103,6 +117,9 @@ static unsigned long shmem_default_max_inodes(void) } #endif +static bool shmem_should_replace_page(struct page *page, gfp_t gfp); +static int shmem_replace_page(struct page **pagep, gfp_t gfp, + struct shmem_inode_info *info, pgoff_t index); static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type); @@ -423,27 +440,31 @@ void shmem_unlock_mapping(struct address_space *mapping) /* * Remove range of pages and swap entries from radix tree, and free them. + * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. */ -void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) +static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, + bool unfalloc) { struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); - pgoff_t end = (lend >> PAGE_CACHE_SHIFT); + pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT; + unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1); + unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1); struct pagevec pvec; pgoff_t indices[PAGEVEC_SIZE]; long nr_swaps_freed = 0; pgoff_t index; int i; - BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); + if (lend == -1) + end = -1; /* unsigned, so actually very big */ pagevec_init(&pvec, 0); index = start; - while (index <= end) { + while (index < end) { pvec.nr = shmem_find_get_pages_and_swap(mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, + min(end - index, (pgoff_t)PAGEVEC_SIZE), pvec.pages, indices); if (!pvec.nr) break; @@ -452,10 +473,12 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) struct page *page = pvec.pages[i]; index = indices[i]; - if (index > end) + if (index >= end) break; if (radix_tree_exceptional_entry(page)) { + if (unfalloc) + continue; nr_swaps_freed += !shmem_free_swap(mapping, index, page); continue; @@ -463,9 +486,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) if (!trylock_page(page)) continue; - if (page->mapping == mapping) { - VM_BUG_ON(PageWriteback(page)); - truncate_inode_page(mapping, page); + if (!unfalloc || !PageUptodate(page)) { + if (page->mapping == mapping) { + VM_BUG_ON(PageWriteback(page)); + truncate_inode_page(mapping, page); + } } unlock_page(page); } @@ -476,30 +501,47 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) index++; } - if (partial) { + if (partial_start) { struct page *page = NULL; shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); if (page) { - zero_user_segment(page, partial, PAGE_CACHE_SIZE); + unsigned int top = PAGE_CACHE_SIZE; + if (start > end) { + top = partial_end; + partial_end = 0; + } + zero_user_segment(page, partial_start, top); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + } + } + if (partial_end) { + struct page *page = NULL; + shmem_getpage(inode, end, &page, SGP_READ, NULL); + if (page) { + zero_user_segment(page, 0, partial_end); set_page_dirty(page); unlock_page(page); page_cache_release(page); } } + if (start >= end) + return; index = start; for ( ; ; ) { cond_resched(); pvec.nr = shmem_find_get_pages_and_swap(mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, + min(end - index, (pgoff_t)PAGEVEC_SIZE), pvec.pages, indices); if (!pvec.nr) { - if (index == start) + if (index == start || unfalloc) break; index = start; continue; } - if (index == start && indices[0] > end) { + if ((index == start || unfalloc) && indices[0] >= end) { shmem_deswap_pagevec(&pvec); pagevec_release(&pvec); break; @@ -509,19 +551,23 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) struct page *page = pvec.pages[i]; index = indices[i]; - if (index > end) + if (index >= end) break; if (radix_tree_exceptional_entry(page)) { + if (unfalloc) + continue; nr_swaps_freed += !shmem_free_swap(mapping, index, page); continue; } lock_page(page); - if (page->mapping == mapping) { - VM_BUG_ON(PageWriteback(page)); - truncate_inode_page(mapping, page); + if (!unfalloc || !PageUptodate(page)) { + if (page->mapping == mapping) { + VM_BUG_ON(PageWriteback(page)); + truncate_inode_page(mapping, page); + } } unlock_page(page); } @@ -535,7 +581,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) info->swapped -= nr_swaps_freed; shmem_recalc_inode(inode); spin_unlock(&info->lock); +} +void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) +{ + shmem_undo_range(inode, lstart, lend, false); inode->i_ctime = inode->i_mtime = CURRENT_TIME; } EXPORT_SYMBOL_GPL(shmem_truncate_range); @@ -604,12 +654,13 @@ static void shmem_evict_inode(struct inode *inode) * If swap found in inode, free it and move page from swapcache to filecache. */ static int shmem_unuse_inode(struct shmem_inode_info *info, - swp_entry_t swap, struct page *page) + swp_entry_t swap, struct page **pagep) { struct address_space *mapping = info->vfs_inode.i_mapping; void *radswap; pgoff_t index; - int error; + gfp_t gfp; + int error = 0; radswap = swp_to_radix_entry(swap); index = radix_tree_locate_item(&mapping->page_tree, radswap); @@ -625,22 +676,37 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, if (shmem_swaplist.next != &info->swaplist) list_move_tail(&shmem_swaplist, &info->swaplist); + gfp = mapping_gfp_mask(mapping); + if (shmem_should_replace_page(*pagep, gfp)) { + mutex_unlock(&shmem_swaplist_mutex); + error = shmem_replace_page(pagep, gfp, info, index); + mutex_lock(&shmem_swaplist_mutex); + /* + * We needed to drop mutex to make that restrictive page + * allocation; but the inode might already be freed by now, + * and we cannot refer to inode or mapping or info to check. + * However, we do hold page lock on the PageSwapCache page, + * so can check if that still has our reference remaining. + */ + if (!page_swapcount(*pagep)) + error = -ENOENT; + } + /* * We rely on shmem_swaplist_mutex, not only to protect the swaplist, * but also to hold up shmem_evict_inode(): so inode cannot be freed * beneath us (pagelock doesn't help until the page is in pagecache). */ - error = shmem_add_to_page_cache(page, mapping, index, + if (!error) + error = shmem_add_to_page_cache(*pagep, mapping, index, GFP_NOWAIT, radswap); - /* which does mem_cgroup_uncharge_cache_page on error */ - if (error != -ENOMEM) { /* * Truncation and eviction use free_swap_and_cache(), which * only does trylock page: if we raced, best clean up here. */ - delete_from_swap_cache(page); - set_page_dirty(page); + delete_from_swap_cache(*pagep); + set_page_dirty(*pagep); if (!error) { spin_lock(&info->lock); info->swapped--; @@ -660,7 +726,14 @@ int shmem_unuse(swp_entry_t swap, struct page *page) struct list_head *this, *next; struct shmem_inode_info *info; int found = 0; - int error; + int error = 0; + + /* + * There's a faint possibility that swap page was replaced before + * caller locked it: it will come back later with the right page. + */ + if (unlikely(!PageSwapCache(page))) + goto out; /* * Charge page using GFP_KERNEL while we can wait, before taking @@ -676,7 +749,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page) list_for_each_safe(this, next, &shmem_swaplist) { info = list_entry(this, struct shmem_inode_info, swaplist); if (info->swapped) - found = shmem_unuse_inode(info, swap, page); + found = shmem_unuse_inode(info, swap, &page); else list_del_init(&info->swaplist); cond_resched(); @@ -685,8 +758,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page) } mutex_unlock(&shmem_swaplist_mutex); - if (!found) - mem_cgroup_uncharge_cache_page(page); if (found < 0) error = found; out: @@ -727,6 +798,38 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ goto redirty; } + + /* + * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC + * value into swapfile.c, the only way we can correctly account for a + * fallocated page arriving here is now to initialize it and write it. + * + * That's okay for a page already fallocated earlier, but if we have + * not yet completed the fallocation, then (a) we want to keep track + * of this page in case we have to undo it, and (b) it may not be a + * good idea to continue anyway, once we're pushing into swap. So + * reactivate the page, and let shmem_fallocate() quit when too many. + */ + if (!PageUptodate(page)) { + if (inode->i_private) { + struct shmem_falloc *shmem_falloc; + spin_lock(&inode->i_lock); + shmem_falloc = inode->i_private; + if (shmem_falloc && + index >= shmem_falloc->start && + index < shmem_falloc->next) + shmem_falloc->nr_unswapped++; + else + shmem_falloc = NULL; + spin_unlock(&inode->i_lock); + if (shmem_falloc) + goto redirty; + } + clear_highpage(page); + flush_dcache_page(page); + SetPageUptodate(page); + } + swap = get_swap_page(); if (!swap.val) goto redirty; @@ -856,6 +959,84 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) #endif /* + * When a page is moved from swapcache to shmem filecache (either by the + * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of + * shmem_unuse_inode()), it may have been read in earlier from swap, in + * ignorance of the mapping it belongs to. If that mapping has special + * constraints (like the gma500 GEM driver, which requires RAM below 4GB), + * we may need to copy to a suitable page before moving to filecache. + * + * In a future release, this may well be extended to respect cpuset and + * NUMA mempolicy, and applied also to anonymous pages in do_swap_page(); + * but for now it is a simple matter of zone. + */ +static bool shmem_should_replace_page(struct page *page, gfp_t gfp) +{ + return page_zonenum(page) > gfp_zone(gfp); +} + +static int shmem_replace_page(struct page **pagep, gfp_t gfp, + struct shmem_inode_info *info, pgoff_t index) +{ + struct page *oldpage, *newpage; + struct address_space *swap_mapping; + pgoff_t swap_index; + int error; + + oldpage = *pagep; + swap_index = page_private(oldpage); + swap_mapping = page_mapping(oldpage); + + /* + * We have arrived here because our zones are constrained, so don't + * limit chance of success by further cpuset and node constraints. + */ + gfp &= ~GFP_CONSTRAINT_MASK; + newpage = shmem_alloc_page(gfp, info, index); + if (!newpage) + return -ENOMEM; + VM_BUG_ON(shmem_should_replace_page(newpage, gfp)); + + *pagep = newpage; + page_cache_get(newpage); + copy_highpage(newpage, oldpage); + + VM_BUG_ON(!PageLocked(oldpage)); + __set_page_locked(newpage); + VM_BUG_ON(!PageUptodate(oldpage)); + SetPageUptodate(newpage); + VM_BUG_ON(!PageSwapBacked(oldpage)); + SetPageSwapBacked(newpage); + VM_BUG_ON(!swap_index); + set_page_private(newpage, swap_index); + VM_BUG_ON(!PageSwapCache(oldpage)); + SetPageSwapCache(newpage); + + /* + * Our caller will very soon move newpage out of swapcache, but it's + * a nice clean interface for us to replace oldpage by newpage there. + */ + spin_lock_irq(&swap_mapping->tree_lock); + error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage, + newpage); + __inc_zone_page_state(newpage, NR_FILE_PAGES); + __dec_zone_page_state(oldpage, NR_FILE_PAGES); + spin_unlock_irq(&swap_mapping->tree_lock); + BUG_ON(error); + + mem_cgroup_replace_page_cache(oldpage, newpage); + lru_cache_add_anon(newpage); + + ClearPageSwapCache(oldpage); + set_page_private(oldpage, 0); + + unlock_page(oldpage); + page_cache_release(oldpage); + page_cache_release(oldpage); + return 0; +} + +/* * shmem_getpage_gfp - find page in cache, or get from swap, or allocate * * If we allocate a new one we do not mark it dirty. That's up to the @@ -872,6 +1053,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, swp_entry_t swap; int error; int once = 0; + int alloced = 0; if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) return -EFBIG; @@ -883,19 +1065,21 @@ repeat: page = NULL; } - if (sgp != SGP_WRITE && + if (sgp != SGP_WRITE && sgp != SGP_FALLOC && ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { error = -EINVAL; goto failed; } + /* fallocated page? */ + if (page && !PageUptodate(page)) { + if (sgp != SGP_READ) + goto clear; + unlock_page(page); + page_cache_release(page); + page = NULL; + } if (page || (sgp == SGP_READ && !swap.val)) { - /* - * Once we can get the page lock, it must be uptodate: - * if there were an error in reading back from swap, - * the page would not be inserted into the filecache. - */ - BUG_ON(page && !PageUptodate(page)); *pagep = page; return 0; } @@ -923,19 +1107,20 @@ repeat: /* We have to do this with page locked to prevent races */ lock_page(page); + if (!PageSwapCache(page) || page->mapping) { + error = -EEXIST; /* try again */ + goto failed; + } if (!PageUptodate(page)) { error = -EIO; goto failed; } wait_on_page_writeback(page); - /* Someone may have already done it for us */ - if (page->mapping) { - if (page->mapping == mapping && - page->index == index) - goto done; - error = -EEXIST; - goto failed; + if (shmem_should_replace_page(page, gfp)) { + error = shmem_replace_page(&page, gfp, info, index); + if (error) + goto failed; } error = mem_cgroup_cache_charge(page, current->mm, @@ -991,19 +1176,36 @@ repeat: inode->i_blocks += BLOCKS_PER_PAGE; shmem_recalc_inode(inode); spin_unlock(&info->lock); + alloced = true; - clear_highpage(page); - flush_dcache_page(page); - SetPageUptodate(page); + /* + * Let SGP_FALLOC use the SGP_WRITE optimization on a new page. + */ + if (sgp == SGP_FALLOC) + sgp = SGP_WRITE; +clear: + /* + * Let SGP_WRITE caller clear ends if write does not fill page; + * but SGP_FALLOC on a page fallocated earlier must initialize + * it now, lest undo on failure cancel our earlier guarantee. + */ + if (sgp != SGP_WRITE) { + clear_highpage(page); + flush_dcache_page(page); + SetPageUptodate(page); + } if (sgp == SGP_DIRTY) set_page_dirty(page); } -done: + /* Perhaps the file has been truncated since we checked */ - if (sgp != SGP_WRITE && + if (sgp != SGP_WRITE && sgp != SGP_FALLOC && ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { error = -EINVAL; - goto trunc; + if (alloced) + goto trunc; + else + goto failed; } *pagep = page; return 0; @@ -1012,6 +1214,7 @@ done: * Error recovery. */ trunc: + info = SHMEM_I(inode); ClearPageDirty(page); delete_from_page_cache(page); spin_lock(&info->lock); @@ -1019,6 +1222,7 @@ trunc: inode->i_blocks -= BLOCKS_PER_PAGE; spin_unlock(&info->lock); decused: + sbinfo = SHMEM_SB(inode->i_sb); if (sbinfo->max_blocks) percpu_counter_add(&sbinfo->used_blocks, -1); unacct: @@ -1204,6 +1408,14 @@ shmem_write_end(struct file *file, struct address_space *mapping, if (pos + copied > inode->i_size) i_size_write(inode, pos + copied); + if (!PageUptodate(page)) { + if (copied < PAGE_CACHE_SIZE) { + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + zero_user_segments(page, 0, from, + from + copied, PAGE_CACHE_SIZE); + } + SetPageUptodate(page); + } set_page_dirty(page); unlock_page(page); page_cache_release(page); @@ -1462,6 +1674,199 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, return error; } +/* + * llseek SEEK_DATA or SEEK_HOLE through the radix_tree. + */ +static pgoff_t shmem_seek_hole_data(struct address_space *mapping, + pgoff_t index, pgoff_t end, int origin) +{ + struct page *page; + struct pagevec pvec; + pgoff_t indices[PAGEVEC_SIZE]; + bool done = false; + int i; + + pagevec_init(&pvec, 0); + pvec.nr = 1; /* start small: we may be there already */ + while (!done) { + pvec.nr = shmem_find_get_pages_and_swap(mapping, index, + pvec.nr, pvec.pages, indices); + if (!pvec.nr) { + if (origin == SEEK_DATA) + index = end; + break; + } + for (i = 0; i < pvec.nr; i++, index++) { + if (index < indices[i]) { + if (origin == SEEK_HOLE) { + done = true; + break; + } + index = indices[i]; + } + page = pvec.pages[i]; + if (page && !radix_tree_exceptional_entry(page)) { + if (!PageUptodate(page)) + page = NULL; + } + if (index >= end || + (page && origin == SEEK_DATA) || + (!page && origin == SEEK_HOLE)) { + done = true; + break; + } + } + shmem_deswap_pagevec(&pvec); + pagevec_release(&pvec); + pvec.nr = PAGEVEC_SIZE; + cond_resched(); + } + return index; +} + +static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin) +{ + struct address_space *mapping; + struct inode *inode; + pgoff_t start, end; + loff_t new_offset; + + if (origin != SEEK_DATA && origin != SEEK_HOLE) + return generic_file_llseek_size(file, offset, origin, + MAX_LFS_FILESIZE); + mapping = file->f_mapping; + inode = mapping->host; + mutex_lock(&inode->i_mutex); + /* We're holding i_mutex so we can access i_size directly */ + + if (offset < 0) + offset = -EINVAL; + else if (offset >= inode->i_size) + offset = -ENXIO; + else { + start = offset >> PAGE_CACHE_SHIFT; + end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + new_offset = shmem_seek_hole_data(mapping, start, end, origin); + new_offset <<= PAGE_CACHE_SHIFT; + if (new_offset > offset) { + if (new_offset < inode->i_size) + offset = new_offset; + else if (origin == SEEK_DATA) + offset = -ENXIO; + else + offset = inode->i_size; + } + } + + if (offset >= 0 && offset != file->f_pos) { + file->f_pos = offset; + file->f_version = 0; + } + mutex_unlock(&inode->i_mutex); + return offset; +} + +static long shmem_fallocate(struct file *file, int mode, loff_t offset, + loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + struct shmem_falloc shmem_falloc; + pgoff_t start, index, end; + int error; + + mutex_lock(&inode->i_mutex); + + if (mode & FALLOC_FL_PUNCH_HOLE) { + struct address_space *mapping = file->f_mapping; + loff_t unmap_start = round_up(offset, PAGE_SIZE); + loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; + + if ((u64)unmap_end > (u64)unmap_start) + unmap_mapping_range(mapping, unmap_start, + 1 + unmap_end - unmap_start, 0); + shmem_truncate_range(inode, offset, offset + len - 1); + /* No need to unmap again: hole-punching leaves COWed pages */ + error = 0; + goto out; + } + + /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */ + error = inode_newsize_ok(inode, offset + len); + if (error) + goto out; + + start = offset >> PAGE_CACHE_SHIFT; + end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + /* Try to avoid a swapstorm if len is impossible to satisfy */ + if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) { + error = -ENOSPC; + goto out; + } + + shmem_falloc.start = start; + shmem_falloc.next = start; + shmem_falloc.nr_falloced = 0; + shmem_falloc.nr_unswapped = 0; + spin_lock(&inode->i_lock); + inode->i_private = &shmem_falloc; + spin_unlock(&inode->i_lock); + + for (index = start; index < end; index++) { + struct page *page; + + /* + * Good, the fallocate(2) manpage permits EINTR: we may have + * been interrupted because we are using up too much memory. + */ + if (signal_pending(current)) + error = -EINTR; + else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) + error = -ENOMEM; + else + error = shmem_getpage(inode, index, &page, SGP_FALLOC, + NULL); + if (error) { + /* Remove the !PageUptodate pages we added */ + shmem_undo_range(inode, + (loff_t)start << PAGE_CACHE_SHIFT, + (loff_t)index << PAGE_CACHE_SHIFT, true); + goto undone; + } + + /* + * Inform shmem_writepage() how far we have reached. + * No need for lock or barrier: we have the page lock. + */ + shmem_falloc.next++; + if (!PageUptodate(page)) + shmem_falloc.nr_falloced++; + + /* + * If !PageUptodate, leave it that way so that freeable pages + * can be recognized if we need to rollback on error later. + * But set_page_dirty so that memory pressure will swap rather + * than free the pages we are allocating (and SGP_CACHE pages + * might still be clean: we now need to mark those dirty too). + */ + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + cond_resched(); + } + + if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) + i_size_write(inode, offset + len); + inode->i_ctime = CURRENT_TIME; +undone: + spin_lock(&inode->i_lock); + inode->i_private = NULL; + spin_unlock(&inode->i_lock); +out: + mutex_unlock(&inode->i_mutex); + return error; +} + static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) { struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); @@ -1665,6 +2070,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s kaddr = kmap_atomic(page); memcpy(kaddr, symname, len); kunmap_atomic(kaddr); + SetPageUptodate(page); set_page_dirty(page); unlock_page(page); page_cache_release(page); @@ -2270,6 +2676,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) } } sb->s_export_op = &shmem_export_ops; + sb->s_flags |= MS_NOSEC; #else sb->s_flags |= MS_NOUSER; #endif @@ -2364,7 +2771,7 @@ static const struct address_space_operations shmem_aops = { static const struct file_operations shmem_file_operations = { .mmap = shmem_mmap, #ifdef CONFIG_TMPFS - .llseek = generic_file_llseek, + .llseek = shmem_file_llseek, .read = do_sync_read, .write = do_sync_write, .aio_read = shmem_file_aio_read, @@ -2372,12 +2779,12 @@ static const struct file_operations shmem_file_operations = { .fsync = noop_fsync, .splice_read = shmem_file_splice_read, .splice_write = generic_file_splice_write, + .fallocate = shmem_fallocate, #endif }; static const struct inode_operations shmem_inode_operations = { .setattr = shmem_setattr, - .truncate_range = shmem_truncate_range, #ifdef CONFIG_TMPFS_XATTR .setxattr = shmem_setxattr, .getxattr = shmem_getxattr, diff --git a/mm/sparse.c b/mm/sparse.c index a8bc7d364deb..6a4bf9160e85 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -273,10 +273,10 @@ static unsigned long *__kmalloc_section_usemap(void) #ifdef CONFIG_MEMORY_HOTREMOVE static unsigned long * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, - unsigned long count) + unsigned long size) { - unsigned long section_nr; - + pg_data_t *host_pgdat; + unsigned long goal; /* * A page may contain usemaps for other sections preventing the * page being freed and making a section unremovable while @@ -287,8 +287,10 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, * from the same section as the pgdat where possible to avoid * this problem. */ - section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); - return alloc_bootmem_section(usemap_size() * count, section_nr); + goal = __pa(pgdat) & PAGE_SECTION_MASK; + host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT)); + return __alloc_bootmem_node_nopanic(host_pgdat, size, + SMP_CACHE_BYTES, goal); } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) @@ -332,9 +334,9 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) #else static unsigned long * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, - unsigned long count) + unsigned long size) { - return NULL; + return alloc_bootmem_node_nopanic(pgdat, size); } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) @@ -352,13 +354,10 @@ static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map, int size = usemap_size(); usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), - usemap_count); + size * usemap_count); if (!usemap) { - usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); - if (!usemap) { - printk(KERN_WARNING "%s: allocation failed\n", __func__); - return; - } + printk(KERN_WARNING "%s: allocation failed\n", __func__); + return; } for (pnum = pnum_begin; pnum < pnum_end; pnum++) { diff --git a/mm/swap.c b/mm/swap.c index 5c13f1338972..4e7e2ec67078 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -47,13 +47,15 @@ static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); static void __page_cache_release(struct page *page) { if (PageLRU(page)) { - unsigned long flags; struct zone *zone = page_zone(page); + struct lruvec *lruvec; + unsigned long flags; spin_lock_irqsave(&zone->lru_lock, flags); + lruvec = mem_cgroup_page_lruvec(page, zone); VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); - del_page_from_lru_list(zone, page, page_off_lru(page)); + del_page_from_lru_list(page, lruvec, page_off_lru(page)); spin_unlock_irqrestore(&zone->lru_lock, flags); } } @@ -82,6 +84,25 @@ static void put_compound_page(struct page *page) if (likely(page != page_head && get_page_unless_zero(page_head))) { unsigned long flags; + + /* + * THP can not break up slab pages so avoid taking + * compound_lock(). Slab performs non-atomic bit ops + * on page->flags for better performance. In particular + * slab_unlock() in slub used to be a hot path. It is + * still hot on arches that do not support + * this_cpu_cmpxchg_double(). + */ + if (PageSlab(page_head)) { + if (PageTail(page)) { + if (put_page_testzero(page_head)) + VM_BUG_ON(1); + + atomic_dec(&page->_mapcount); + goto skip_lock_tail; + } else + goto skip_lock; + } /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time @@ -92,10 +113,10 @@ static void put_compound_page(struct page *page) if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); - VM_BUG_ON(PageHead(page_head)); +skip_lock: if (put_page_testzero(page_head)) __put_single_page(page_head); - out_put_single: +out_put_single: if (put_page_testzero(page)) __put_single_page(page); return; @@ -115,6 +136,8 @@ static void put_compound_page(struct page *page) VM_BUG_ON(atomic_read(&page_head->_count) <= 0); VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); + +skip_lock_tail: if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); @@ -162,6 +185,18 @@ bool __get_page_tail(struct page *page) struct page *page_head = compound_trans_head(page); if (likely(page != page_head && get_page_unless_zero(page_head))) { + + /* Ref to put_compound_page() comment. */ + if (PageSlab(page_head)) { + if (likely(PageTail(page))) { + __get_page_tail_foll(page, false); + return true; + } else { + put_page(page_head); + return false; + } + } + /* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time @@ -202,11 +237,12 @@ void put_pages_list(struct list_head *pages) EXPORT_SYMBOL(put_pages_list); static void pagevec_lru_move_fn(struct pagevec *pvec, - void (*move_fn)(struct page *page, void *arg), - void *arg) + void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg), + void *arg) { int i; struct zone *zone = NULL; + struct lruvec *lruvec; unsigned long flags = 0; for (i = 0; i < pagevec_count(pvec); i++) { @@ -220,7 +256,8 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, spin_lock_irqsave(&zone->lru_lock, flags); } - (*move_fn)(page, arg); + lruvec = mem_cgroup_page_lruvec(page, zone); + (*move_fn)(page, lruvec, arg); } if (zone) spin_unlock_irqrestore(&zone->lru_lock, flags); @@ -228,16 +265,13 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, pagevec_reinit(pvec); } -static void pagevec_move_tail_fn(struct page *page, void *arg) +static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec, + void *arg) { int *pgmoved = arg; if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { enum lru_list lru = page_lru_base_type(page); - struct lruvec *lruvec; - - lruvec = mem_cgroup_lru_move_lists(page_zone(page), - page, lru, lru); list_move_tail(&page->lru, &lruvec->lists[lru]); (*pgmoved)++; } @@ -276,41 +310,30 @@ void rotate_reclaimable_page(struct page *page) } } -static void update_page_reclaim_stat(struct zone *zone, struct page *page, +static void update_page_reclaim_stat(struct lruvec *lruvec, int file, int rotated) { - struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat; - struct zone_reclaim_stat *memcg_reclaim_stat; - - memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; reclaim_stat->recent_scanned[file]++; if (rotated) reclaim_stat->recent_rotated[file]++; - - if (!memcg_reclaim_stat) - return; - - memcg_reclaim_stat->recent_scanned[file]++; - if (rotated) - memcg_reclaim_stat->recent_rotated[file]++; } -static void __activate_page(struct page *page, void *arg) +static void __activate_page(struct page *page, struct lruvec *lruvec, + void *arg) { - struct zone *zone = page_zone(page); - if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { int file = page_is_file_cache(page); int lru = page_lru_base_type(page); - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); SetPageActive(page); lru += LRU_ACTIVE; - add_page_to_lru_list(zone, page, lru); - __count_vm_event(PGACTIVATE); + add_page_to_lru_list(page, lruvec, lru); - update_page_reclaim_stat(zone, page, file, 1); + __count_vm_event(PGACTIVATE); + update_page_reclaim_stat(lruvec, file, 1); } } @@ -347,7 +370,7 @@ void activate_page(struct page *page) struct zone *zone = page_zone(page); spin_lock_irq(&zone->lru_lock); - __activate_page(page, NULL); + __activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL); spin_unlock_irq(&zone->lru_lock); } #endif @@ -414,11 +437,13 @@ void lru_cache_add_lru(struct page *page, enum lru_list lru) void add_page_to_unevictable_list(struct page *page) { struct zone *zone = page_zone(page); + struct lruvec *lruvec; spin_lock_irq(&zone->lru_lock); + lruvec = mem_cgroup_page_lruvec(page, zone); SetPageUnevictable(page); SetPageLRU(page); - add_page_to_lru_list(zone, page, LRU_UNEVICTABLE); + add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE); spin_unlock_irq(&zone->lru_lock); } @@ -443,11 +468,11 @@ void add_page_to_unevictable_list(struct page *page) * be write it out by flusher threads as this is much more effective * than the single-page writeout from reclaim. */ -static void lru_deactivate_fn(struct page *page, void *arg) +static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, + void *arg) { int lru, file; bool active; - struct zone *zone = page_zone(page); if (!PageLRU(page)) return; @@ -460,13 +485,13 @@ static void lru_deactivate_fn(struct page *page, void *arg) return; active = PageActive(page); - file = page_is_file_cache(page); lru = page_lru_base_type(page); - del_page_from_lru_list(zone, page, lru + active); + + del_page_from_lru_list(page, lruvec, lru + active); ClearPageActive(page); ClearPageReferenced(page); - add_page_to_lru_list(zone, page, lru); + add_page_to_lru_list(page, lruvec, lru); if (PageWriteback(page) || PageDirty(page)) { /* @@ -476,19 +501,17 @@ static void lru_deactivate_fn(struct page *page, void *arg) */ SetPageReclaim(page); } else { - struct lruvec *lruvec; /* * The page's writeback ends up during pagevec * We moves tha page into tail of inactive. */ - lruvec = mem_cgroup_lru_move_lists(zone, page, lru, lru); list_move_tail(&page->lru, &lruvec->lists[lru]); __count_vm_event(PGROTATED); } if (active) __count_vm_event(PGDEACTIVATE); - update_page_reclaim_stat(zone, page, file, 0); + update_page_reclaim_stat(lruvec, file, 0); } /* @@ -588,6 +611,7 @@ void release_pages(struct page **pages, int nr, int cold) int i; LIST_HEAD(pages_to_free); struct zone *zone = NULL; + struct lruvec *lruvec; unsigned long uninitialized_var(flags); for (i = 0; i < nr; i++) { @@ -615,9 +639,11 @@ void release_pages(struct page **pages, int nr, int cold) zone = pagezone; spin_lock_irqsave(&zone->lru_lock, flags); } + + lruvec = mem_cgroup_page_lruvec(page, zone); VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); - del_page_from_lru_list(zone, page, page_off_lru(page)); + del_page_from_lru_list(page, lruvec, page_off_lru(page)); } list_add(&page->lru, &pages_to_free); @@ -649,8 +675,8 @@ EXPORT_SYMBOL(__pagevec_release); #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* used by __split_huge_page_refcount() */ -void lru_add_page_tail(struct zone* zone, - struct page *page, struct page *page_tail) +void lru_add_page_tail(struct page *page, struct page *page_tail, + struct lruvec *lruvec) { int uninitialized_var(active); enum lru_list lru; @@ -659,7 +685,8 @@ void lru_add_page_tail(struct zone* zone, VM_BUG_ON(!PageHead(page)); VM_BUG_ON(PageCompound(page_tail)); VM_BUG_ON(PageLRU(page_tail)); - VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&zone->lru_lock)); + VM_BUG_ON(NR_CPUS != 1 && + !spin_is_locked(&lruvec_zone(lruvec)->lru_lock)); SetPageLRU(page_tail); @@ -688,20 +715,20 @@ void lru_add_page_tail(struct zone* zone, * Use the standard add function to put page_tail on the list, * but then correct its position so they all end up in order. */ - add_page_to_lru_list(zone, page_tail, lru); + add_page_to_lru_list(page_tail, lruvec, lru); list_head = page_tail->lru.prev; list_move_tail(&page_tail->lru, list_head); } if (!PageUnevictable(page)) - update_page_reclaim_stat(zone, page_tail, file, active); + update_page_reclaim_stat(lruvec, file, active); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static void __pagevec_lru_add_fn(struct page *page, void *arg) +static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, + void *arg) { enum lru_list lru = (enum lru_list)arg; - struct zone *zone = page_zone(page); int file = is_file_lru(lru); int active = is_active_lru(lru); @@ -712,8 +739,8 @@ static void __pagevec_lru_add_fn(struct page *page, void *arg) SetPageLRU(page); if (active) SetPageActive(page); - add_page_to_lru_list(zone, page, lru); - update_page_reclaim_stat(zone, page, file, active); + add_page_to_lru_list(page, lruvec, lru); + update_page_reclaim_stat(lruvec, file, active); } /* diff --git a/mm/swapfile.c b/mm/swapfile.c index fafc26d1b1dc..457b10baef59 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -601,7 +601,7 @@ void swapcache_free(swp_entry_t entry, struct page *page) * This does not give an exact answer when swap count is continued, * but does include the high COUNT_CONTINUED flag to allow for that. */ -static inline int page_swapcount(struct page *page) +int page_swapcount(struct page *page) { int count = 0; struct swap_info_struct *p; @@ -717,37 +717,6 @@ int free_swap_and_cache(swp_entry_t entry) return p != NULL; } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR -/** - * mem_cgroup_count_swap_user - count the user of a swap entry - * @ent: the swap entry to be checked - * @pagep: the pointer for the swap cache page of the entry to be stored - * - * Returns the number of the user of the swap entry. The number is valid only - * for swaps of anonymous pages. - * If the entry is found on swap cache, the page is stored to pagep with - * refcount of it being incremented. - */ -int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep) -{ - struct page *page; - struct swap_info_struct *p; - int count = 0; - - page = find_get_page(&swapper_space, ent.val); - if (page) - count += page_mapcount(page); - p = swap_info_get(ent); - if (p) { - count += swap_count(p->swap_map[swp_offset(ent)]); - spin_unlock(&swap_lock); - } - - *pagep = page; - return count; -} -#endif - #ifdef CONFIG_HIBERNATION /* * Find the swap type that corresponds to given device (if any). diff --git a/mm/thrash.c b/mm/thrash.c deleted file mode 100644 index 57ad495dbd54..000000000000 --- a/mm/thrash.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * mm/thrash.c - * - * Copyright (C) 2004, Red Hat, Inc. - * Copyright (C) 2004, Rik van Riel <riel@redhat.com> - * Released under the GPL, see the file COPYING for details. - * - * Simple token based thrashing protection, using the algorithm - * described in: http://www.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-1.html - * - * Sep 2006, Ashwin Chaugule <ashwin.chaugule@celunite.com> - * Improved algorithm to pass token: - * Each task has a priority which is incremented if it contended - * for the token in an interval less than its previous attempt. - * If the token is acquired, that task's priority is boosted to prevent - * the token from bouncing around too often and to let the task make - * some progress in its execution. - */ - -#include <linux/jiffies.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/swap.h> -#include <linux/memcontrol.h> - -#include <trace/events/vmscan.h> - -#define TOKEN_AGING_INTERVAL (0xFF) - -static DEFINE_SPINLOCK(swap_token_lock); -struct mm_struct *swap_token_mm; -static struct mem_cgroup *swap_token_memcg; - -#ifdef CONFIG_CGROUP_MEM_RES_CTLR -static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) -{ - struct mem_cgroup *memcg; - - memcg = try_get_mem_cgroup_from_mm(mm); - if (memcg) - css_put(mem_cgroup_css(memcg)); - - return memcg; -} -#else -static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) -{ - return NULL; -} -#endif - -void grab_swap_token(struct mm_struct *mm) -{ - int current_interval; - unsigned int old_prio = mm->token_priority; - static unsigned int global_faults; - static unsigned int last_aging; - - global_faults++; - - current_interval = global_faults - mm->faultstamp; - - if (!spin_trylock(&swap_token_lock)) - return; - - /* First come first served */ - if (!swap_token_mm) - goto replace_token; - - /* - * Usually, we don't need priority aging because long interval faults - * makes priority decrease quickly. But there is one exception. If the - * token owner task is sleeping, it never make long interval faults. - * Thus, we need a priority aging mechanism instead. The requirements - * of priority aging are - * 1) An aging interval is reasonable enough long. Too short aging - * interval makes quick swap token lost and decrease performance. - * 2) The swap token owner task have to get priority aging even if - * it's under sleep. - */ - if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) { - swap_token_mm->token_priority /= 2; - last_aging = global_faults; - } - - if (mm == swap_token_mm) { - mm->token_priority += 2; - goto update_priority; - } - - if (current_interval < mm->last_interval) - mm->token_priority++; - else { - if (likely(mm->token_priority > 0)) - mm->token_priority--; - } - - /* Check if we deserve the token */ - if (mm->token_priority > swap_token_mm->token_priority) - goto replace_token; - -update_priority: - trace_update_swap_token_priority(mm, old_prio, swap_token_mm); - -out: - mm->faultstamp = global_faults; - mm->last_interval = current_interval; - spin_unlock(&swap_token_lock); - return; - -replace_token: - mm->token_priority += 2; - trace_replace_swap_token(swap_token_mm, mm); - swap_token_mm = mm; - swap_token_memcg = swap_token_memcg_from_mm(mm); - last_aging = global_faults; - goto out; -} - -/* Called on process exit. */ -void __put_swap_token(struct mm_struct *mm) -{ - spin_lock(&swap_token_lock); - if (likely(mm == swap_token_mm)) { - trace_put_swap_token(swap_token_mm); - swap_token_mm = NULL; - swap_token_memcg = NULL; - } - spin_unlock(&swap_token_lock); -} - -static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b) -{ - if (!a) - return true; - if (!b) - return true; - if (a == b) - return true; - return false; -} - -void disable_swap_token(struct mem_cgroup *memcg) -{ - /* memcg reclaim don't disable unrelated mm token. */ - if (match_memcg(memcg, swap_token_memcg)) { - spin_lock(&swap_token_lock); - if (match_memcg(memcg, swap_token_memcg)) { - trace_disable_swap_token(swap_token_mm); - swap_token_mm = NULL; - swap_token_memcg = NULL; - } - spin_unlock(&swap_token_lock); - } -} diff --git a/mm/truncate.c b/mm/truncate.c index 61a183b89df6..75801acdaac7 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -602,31 +602,6 @@ int vmtruncate(struct inode *inode, loff_t newsize) } EXPORT_SYMBOL(vmtruncate); -int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend) -{ - struct address_space *mapping = inode->i_mapping; - loff_t holebegin = round_up(lstart, PAGE_SIZE); - loff_t holelen = 1 + lend - holebegin; - - /* - * If the underlying filesystem is not going to provide - * a way to truncate a range of blocks (punch a hole) - - * we should return failure right now. - */ - if (!inode->i_op->truncate_range) - return -ENOSYS; - - mutex_lock(&inode->i_mutex); - inode_dio_wait(inode); - unmap_mapping_range(mapping, holebegin, holelen, 1); - inode->i_op->truncate_range(inode, lstart, lend); - /* unmap again to remove racily COWed private pages */ - unmap_mapping_range(mapping, holebegin, holelen, 1); - mutex_unlock(&inode->i_mutex); - - return 0; -} - /** * truncate_pagecache_range - unmap and remove pagecache that is hole-punched * @inode: inode diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 94dff883b449..2aad49981b57 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1185,9 +1185,10 @@ void __init vmalloc_init(void) /* Import existing vmlist entries. */ for (tmp = vmlist; tmp; tmp = tmp->next) { va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); - va->flags = tmp->flags | VM_VM_AREA; + va->flags = VM_VM_AREA; va->va_start = (unsigned long)tmp->addr; va->va_end = va->va_start + tmp->size; + va->vm = tmp; __insert_vmap_area(va); } @@ -2375,8 +2376,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, return NULL; } - vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL); - vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL); + vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL); + vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL); if (!vas || !vms) goto err_free2; diff --git a/mm/vmscan.c b/mm/vmscan.c index 33dc256033b5..eeb3bc9d1d36 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -53,24 +53,6 @@ #define CREATE_TRACE_POINTS #include <trace/events/vmscan.h> -/* - * reclaim_mode determines how the inactive list is shrunk - * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages - * RECLAIM_MODE_ASYNC: Do not block - * RECLAIM_MODE_SYNC: Allow blocking e.g. call wait_on_page_writeback - * RECLAIM_MODE_LUMPYRECLAIM: For high-order allocations, take a reference - * page from the LRU and reclaim all pages within a - * naturally aligned range - * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of - * order-0 pages and then compact the zone - */ -typedef unsigned __bitwise__ reclaim_mode_t; -#define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u) -#define RECLAIM_MODE_ASYNC ((__force reclaim_mode_t)0x02u) -#define RECLAIM_MODE_SYNC ((__force reclaim_mode_t)0x04u) -#define RECLAIM_MODE_LUMPYRECLAIM ((__force reclaim_mode_t)0x08u) -#define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u) - struct scan_control { /* Incremented by the number of inactive pages that were scanned */ unsigned long nr_scanned; @@ -96,11 +78,8 @@ struct scan_control { int order; - /* - * Intend to reclaim enough continuous memory rather than reclaim - * enough amount of memory. i.e, mode for high order allocation. - */ - reclaim_mode_t reclaim_mode; + /* Scan (total_size >> priority) pages at once */ + int priority; /* * The memory cgroup that hit its limit and as a result is the @@ -115,11 +94,6 @@ struct scan_control { nodemask_t *nodemask; }; -struct mem_cgroup_zone { - struct mem_cgroup *mem_cgroup; - struct zone *zone; -}; - #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) #ifdef ARCH_HAS_PREFETCH @@ -164,44 +138,21 @@ static bool global_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup; } - -static bool scanning_global_lru(struct mem_cgroup_zone *mz) -{ - return !mz->mem_cgroup; -} #else static bool global_reclaim(struct scan_control *sc) { return true; } - -static bool scanning_global_lru(struct mem_cgroup_zone *mz) -{ - return true; -} #endif -static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) -{ - if (!scanning_global_lru(mz)) - return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone); - - return &mz->zone->reclaim_stat; -} - -static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, - enum lru_list lru) +static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) { - if (!scanning_global_lru(mz)) - return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, - zone_to_nid(mz->zone), - zone_idx(mz->zone), - BIT(lru)); + if (!mem_cgroup_disabled()) + return mem_cgroup_get_lru_size(lruvec, lru); - return zone_page_state(mz->zone, NR_LRU_BASE + lru); + return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru); } - /* * Add a shrinker callback to be called from the vm */ @@ -364,39 +315,6 @@ out: return ret; } -static void set_reclaim_mode(int priority, struct scan_control *sc, - bool sync) -{ - reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC; - - /* - * Initially assume we are entering either lumpy reclaim or - * reclaim/compaction.Depending on the order, we will either set the - * sync mode or just reclaim order-0 pages later. - */ - if (COMPACTION_BUILD) - sc->reclaim_mode = RECLAIM_MODE_COMPACTION; - else - sc->reclaim_mode = RECLAIM_MODE_LUMPYRECLAIM; - - /* - * Avoid using lumpy reclaim or reclaim/compaction if possible by - * restricting when its set to either costly allocations or when - * under memory pressure - */ - if (sc->order > PAGE_ALLOC_COSTLY_ORDER) - sc->reclaim_mode |= syncmode; - else if (sc->order && priority < DEF_PRIORITY - 2) - sc->reclaim_mode |= syncmode; - else - sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; -} - -static void reset_reclaim_mode(struct scan_control *sc) -{ - sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; -} - static inline int is_page_cache_freeable(struct page *page) { /* @@ -416,10 +334,6 @@ static int may_write_to_queue(struct backing_dev_info *bdi, return 1; if (bdi == current->backing_dev_info) return 1; - - /* lumpy reclaim for hugepage often need a lot of write */ - if (sc->order > PAGE_ALLOC_COSTLY_ORDER) - return 1; return 0; } @@ -523,8 +437,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, /* synchronous write or broken a_ops? */ ClearPageReclaim(page); } - trace_mm_vmscan_writepage(page, - trace_reclaim_flags(page, sc->reclaim_mode)); + trace_mm_vmscan_writepage(page, trace_reclaim_flags(page)); inc_zone_page_state(page, NR_VMSCAN_WRITE); return PAGE_SUCCESS; } @@ -701,19 +614,15 @@ enum page_references { }; static enum page_references page_check_references(struct page *page, - struct mem_cgroup_zone *mz, struct scan_control *sc) { int referenced_ptes, referenced_page; unsigned long vm_flags; - referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); + referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, + &vm_flags); referenced_page = TestClearPageReferenced(page); - /* Lumpy reclaim - ignore references */ - if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) - return PAGEREF_RECLAIM; - /* * Mlock lost the isolation race with us. Let try_to_unmap() * move the page to the unevictable list. @@ -722,7 +631,7 @@ static enum page_references page_check_references(struct page *page, return PAGEREF_RECLAIM; if (referenced_ptes) { - if (PageAnon(page)) + if (PageSwapBacked(page)) return PAGEREF_ACTIVATE; /* * All mapped pages start out with page table @@ -763,9 +672,8 @@ static enum page_references page_check_references(struct page *page, * shrink_page_list() returns the number of reclaimed pages */ static unsigned long shrink_page_list(struct list_head *page_list, - struct mem_cgroup_zone *mz, + struct zone *zone, struct scan_control *sc, - int priority, unsigned long *ret_nr_dirty, unsigned long *ret_nr_writeback) { @@ -794,7 +702,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep; VM_BUG_ON(PageActive(page)); - VM_BUG_ON(page_zone(page) != mz->zone); + VM_BUG_ON(page_zone(page) != zone); sc->nr_scanned++; @@ -813,22 +721,11 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageWriteback(page)) { nr_writeback++; - /* - * Synchronous reclaim cannot queue pages for - * writeback due to the possibility of stack overflow - * but if it encounters a page under writeback, wait - * for the IO to complete. - */ - if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) && - may_enter_fs) - wait_on_page_writeback(page); - else { - unlock_page(page); - goto keep_lumpy; - } + unlock_page(page); + goto keep; } - references = page_check_references(page, mz, sc); + references = page_check_references(page, sc); switch (references) { case PAGEREF_ACTIVATE: goto activate_locked; @@ -879,7 +776,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, * unless under significant pressure. */ if (page_is_file_cache(page) && - (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) { + (!current_is_kswapd() || + sc->priority >= DEF_PRIORITY - 2)) { /* * Immediately reclaim when written back. * Similar in principal to deactivate_page() @@ -908,7 +806,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto activate_locked; case PAGE_SUCCESS: if (PageWriteback(page)) - goto keep_lumpy; + goto keep; if (PageDirty(page)) goto keep; @@ -994,7 +892,6 @@ cull_mlocked: try_to_free_swap(page); unlock_page(page); putback_lru_page(page); - reset_reclaim_mode(sc); continue; activate_locked: @@ -1007,8 +904,6 @@ activate_locked: keep_locked: unlock_page(page); keep: - reset_reclaim_mode(sc); -keep_lumpy: list_add(&page->lru, &ret_pages); VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); } @@ -1020,7 +915,7 @@ keep_lumpy: * will encounter the same problem */ if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) - zone_set_flag(mz->zone, ZONE_CONGESTED); + zone_set_flag(zone, ZONE_CONGESTED); free_hot_cold_page_list(&free_pages, 1); @@ -1041,34 +936,15 @@ keep_lumpy: * * returns 0 on success, -ve errno on failure. */ -int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) +int __isolate_lru_page(struct page *page, isolate_mode_t mode) { - bool all_lru_mode; int ret = -EINVAL; /* Only take pages on the LRU. */ if (!PageLRU(page)) return ret; - all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) == - (ISOLATE_ACTIVE|ISOLATE_INACTIVE); - - /* - * When checking the active state, we need to be sure we are - * dealing with comparible boolean values. Take the logical not - * of each. - */ - if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE)) - return ret; - - if (!all_lru_mode && !!page_is_file_cache(page) != file) - return ret; - - /* - * When this function is being called for lumpy reclaim, we - * initially look into all LRU pages, active, inactive and - * unevictable; only give shrink_page_list evictable pages. - */ + /* Do not give back unevictable pages for compaction */ if (PageUnevictable(page)) return ret; @@ -1135,54 +1011,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) * Appropriate locks must be held before calling this function. * * @nr_to_scan: The number of pages to look through on the list. - * @mz: The mem_cgroup_zone to pull pages from. + * @lruvec: The LRU vector to pull pages from. * @dst: The temp list to put pages on to. * @nr_scanned: The number of pages that were scanned. * @sc: The scan_control struct for this reclaim session * @mode: One of the LRU isolation modes - * @active: True [1] if isolating active pages - * @file: True [1] if isolating file [!anon] pages + * @lru: LRU list id for isolating * * returns how many pages were moved onto *@dst. */ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, - struct mem_cgroup_zone *mz, struct list_head *dst, + struct lruvec *lruvec, struct list_head *dst, unsigned long *nr_scanned, struct scan_control *sc, - isolate_mode_t mode, int active, int file) + isolate_mode_t mode, enum lru_list lru) { - struct lruvec *lruvec; - struct list_head *src; + struct list_head *src = &lruvec->lists[lru]; unsigned long nr_taken = 0; - unsigned long nr_lumpy_taken = 0; - unsigned long nr_lumpy_dirty = 0; - unsigned long nr_lumpy_failed = 0; unsigned long scan; - int lru = LRU_BASE; - - lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); - if (active) - lru += LRU_ACTIVE; - if (file) - lru += LRU_FILE; - src = &lruvec->lists[lru]; for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { struct page *page; - unsigned long pfn; - unsigned long end_pfn; - unsigned long page_pfn; - int zone_id; + int nr_pages; page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); VM_BUG_ON(!PageLRU(page)); - switch (__isolate_lru_page(page, mode, file)) { + switch (__isolate_lru_page(page, mode)) { case 0: - mem_cgroup_lru_del(page); + nr_pages = hpage_nr_pages(page); + mem_cgroup_update_lru_size(lruvec, lru, -nr_pages); list_move(&page->lru, dst); - nr_taken += hpage_nr_pages(page); + nr_taken += nr_pages; break; case -EBUSY: @@ -1193,93 +1054,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, default: BUG(); } - - if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)) - continue; - - /* - * Attempt to take all pages in the order aligned region - * surrounding the tag page. Only take those pages of - * the same active state as that tag page. We may safely - * round the target page pfn down to the requested order - * as the mem_map is guaranteed valid out to MAX_ORDER, - * where that page is in a different zone we will detect - * it from its zone id and abort this block scan. - */ - zone_id = page_zone_id(page); - page_pfn = page_to_pfn(page); - pfn = page_pfn & ~((1 << sc->order) - 1); - end_pfn = pfn + (1 << sc->order); - for (; pfn < end_pfn; pfn++) { - struct page *cursor_page; - - /* The target page is in the block, ignore it. */ - if (unlikely(pfn == page_pfn)) - continue; - - /* Avoid holes within the zone. */ - if (unlikely(!pfn_valid_within(pfn))) - break; - - cursor_page = pfn_to_page(pfn); - - /* Check that we have not crossed a zone boundary. */ - if (unlikely(page_zone_id(cursor_page) != zone_id)) - break; - - /* - * If we don't have enough swap space, reclaiming of - * anon page which don't already have a swap slot is - * pointless. - */ - if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) && - !PageSwapCache(cursor_page)) - break; - - if (__isolate_lru_page(cursor_page, mode, file) == 0) { - unsigned int isolated_pages; - - mem_cgroup_lru_del(cursor_page); - list_move(&cursor_page->lru, dst); - isolated_pages = hpage_nr_pages(cursor_page); - nr_taken += isolated_pages; - nr_lumpy_taken += isolated_pages; - if (PageDirty(cursor_page)) - nr_lumpy_dirty += isolated_pages; - scan++; - pfn += isolated_pages - 1; - } else { - /* - * Check if the page is freed already. - * - * We can't use page_count() as that - * requires compound_head and we don't - * have a pin on the page here. If a - * page is tail, we may or may not - * have isolated the head, so assume - * it's not free, it'd be tricky to - * track the head status without a - * page pin. - */ - if (!PageTail(cursor_page) && - !atomic_read(&cursor_page->_count)) - continue; - break; - } - } - - /* If we break out of the loop above, lumpy reclaim failed */ - if (pfn < end_pfn) - nr_lumpy_failed++; } *nr_scanned = scan; - - trace_mm_vmscan_lru_isolate(sc->order, - nr_to_scan, scan, - nr_taken, - nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, - mode, file); + trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan, + nr_taken, mode, is_file_lru(lru)); return nr_taken; } @@ -1316,15 +1095,16 @@ int isolate_lru_page(struct page *page) if (PageLRU(page)) { struct zone *zone = page_zone(page); + struct lruvec *lruvec; spin_lock_irq(&zone->lru_lock); + lruvec = mem_cgroup_page_lruvec(page, zone); if (PageLRU(page)) { int lru = page_lru(page); - ret = 0; get_page(page); ClearPageLRU(page); - - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); + ret = 0; } spin_unlock_irq(&zone->lru_lock); } @@ -1357,11 +1137,10 @@ static int too_many_isolated(struct zone *zone, int file, } static noinline_for_stack void -putback_inactive_pages(struct mem_cgroup_zone *mz, - struct list_head *page_list) +putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) { - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); - struct zone *zone = mz->zone; + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; + struct zone *zone = lruvec_zone(lruvec); LIST_HEAD(pages_to_free); /* @@ -1379,9 +1158,13 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, spin_lock_irq(&zone->lru_lock); continue; } + + lruvec = mem_cgroup_page_lruvec(page, zone); + SetPageLRU(page); lru = page_lru(page); - add_page_to_lru_list(zone, page, lru); + add_page_to_lru_list(page, lruvec, lru); + if (is_active_lru(lru)) { int file = is_file_lru(lru); int numpages = hpage_nr_pages(page); @@ -1390,7 +1173,7 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, if (put_page_testzero(page)) { __ClearPageLRU(page); __ClearPageActive(page); - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); if (unlikely(PageCompound(page))) { spin_unlock_irq(&zone->lru_lock); @@ -1407,112 +1190,24 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, list_splice(&pages_to_free, page_list); } -static noinline_for_stack void -update_isolated_counts(struct mem_cgroup_zone *mz, - struct list_head *page_list, - unsigned long *nr_anon, - unsigned long *nr_file) -{ - struct zone *zone = mz->zone; - unsigned int count[NR_LRU_LISTS] = { 0, }; - unsigned long nr_active = 0; - struct page *page; - int lru; - - /* - * Count pages and clear active flags - */ - list_for_each_entry(page, page_list, lru) { - int numpages = hpage_nr_pages(page); - lru = page_lru_base_type(page); - if (PageActive(page)) { - lru += LRU_ACTIVE; - ClearPageActive(page); - nr_active += numpages; - } - count[lru] += numpages; - } - - preempt_disable(); - __count_vm_events(PGDEACTIVATE, nr_active); - - __mod_zone_page_state(zone, NR_ACTIVE_FILE, - -count[LRU_ACTIVE_FILE]); - __mod_zone_page_state(zone, NR_INACTIVE_FILE, - -count[LRU_INACTIVE_FILE]); - __mod_zone_page_state(zone, NR_ACTIVE_ANON, - -count[LRU_ACTIVE_ANON]); - __mod_zone_page_state(zone, NR_INACTIVE_ANON, - -count[LRU_INACTIVE_ANON]); - - *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; - *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; - - __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file); - preempt_enable(); -} - -/* - * Returns true if a direct reclaim should wait on pages under writeback. - * - * If we are direct reclaiming for contiguous pages and we do not reclaim - * everything in the list, try again and wait for writeback IO to complete. - * This will stall high-order allocations noticeably. Only do that when really - * need to free the pages under high memory pressure. - */ -static inline bool should_reclaim_stall(unsigned long nr_taken, - unsigned long nr_freed, - int priority, - struct scan_control *sc) -{ - int lumpy_stall_priority; - - /* kswapd should not stall on sync IO */ - if (current_is_kswapd()) - return false; - - /* Only stall on lumpy reclaim */ - if (sc->reclaim_mode & RECLAIM_MODE_SINGLE) - return false; - - /* If we have reclaimed everything on the isolated list, no stall */ - if (nr_freed == nr_taken) - return false; - - /* - * For high-order allocations, there are two stall thresholds. - * High-cost allocations stall immediately where as lower - * order allocations such as stacks require the scanning - * priority to be much higher before stalling. - */ - if (sc->order > PAGE_ALLOC_COSTLY_ORDER) - lumpy_stall_priority = DEF_PRIORITY; - else - lumpy_stall_priority = DEF_PRIORITY / 3; - - return priority <= lumpy_stall_priority; -} - /* * shrink_inactive_list() is a helper for shrink_zone(). It returns the number * of reclaimed pages */ static noinline_for_stack unsigned long -shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, - struct scan_control *sc, int priority, int file) +shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, + struct scan_control *sc, enum lru_list lru) { LIST_HEAD(page_list); unsigned long nr_scanned; unsigned long nr_reclaimed = 0; unsigned long nr_taken; - unsigned long nr_anon; - unsigned long nr_file; unsigned long nr_dirty = 0; unsigned long nr_writeback = 0; - isolate_mode_t isolate_mode = ISOLATE_INACTIVE; - struct zone *zone = mz->zone; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + isolate_mode_t isolate_mode = 0; + int file = is_file_lru(lru); + struct zone *zone = lruvec_zone(lruvec); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; while (unlikely(too_many_isolated(zone, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1522,10 +1217,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, return SWAP_CLUSTER_MAX; } - set_reclaim_mode(priority, sc, false); - if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) - isolate_mode |= ISOLATE_ACTIVE; - lru_add_drain(); if (!sc->may_unmap) @@ -1535,38 +1226,30 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, spin_lock_irq(&zone->lru_lock); - nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, &nr_scanned, - sc, isolate_mode, 0, file); + nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list, + &nr_scanned, sc, isolate_mode, lru); + + __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); + __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); + if (global_reclaim(sc)) { zone->pages_scanned += nr_scanned; if (current_is_kswapd()) - __count_zone_vm_events(PGSCAN_KSWAPD, zone, - nr_scanned); + __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); else - __count_zone_vm_events(PGSCAN_DIRECT, zone, - nr_scanned); + __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); } spin_unlock_irq(&zone->lru_lock); if (nr_taken == 0) return 0; - update_isolated_counts(mz, &page_list, &nr_anon, &nr_file); - - nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, + nr_reclaimed = shrink_page_list(&page_list, zone, sc, &nr_dirty, &nr_writeback); - /* Check if we should syncronously wait for writeback */ - if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { - set_reclaim_mode(priority, sc, true); - nr_reclaimed += shrink_page_list(&page_list, mz, sc, - priority, &nr_dirty, &nr_writeback); - } - spin_lock_irq(&zone->lru_lock); - reclaim_stat->recent_scanned[0] += nr_anon; - reclaim_stat->recent_scanned[1] += nr_file; + reclaim_stat->recent_scanned[file] += nr_taken; if (global_reclaim(sc)) { if (current_is_kswapd()) @@ -1577,10 +1260,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, nr_reclaimed); } - putback_inactive_pages(mz, &page_list); + putback_inactive_pages(lruvec, &page_list); - __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); + __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); @@ -1609,14 +1291,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any * isolated page is PageWriteback */ - if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority))) + if (nr_writeback && nr_writeback >= + (nr_taken >> (DEF_PRIORITY - sc->priority))) wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, zone_idx(zone), nr_scanned, nr_reclaimed, - priority, - trace_shrink_flags(file, sc->reclaim_mode)); + sc->priority, + trace_shrink_flags(file)); return nr_reclaimed; } @@ -1638,30 +1321,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, * But we had to alter page->flags anyway. */ -static void move_active_pages_to_lru(struct zone *zone, +static void move_active_pages_to_lru(struct lruvec *lruvec, struct list_head *list, struct list_head *pages_to_free, enum lru_list lru) { + struct zone *zone = lruvec_zone(lruvec); unsigned long pgmoved = 0; struct page *page; + int nr_pages; while (!list_empty(list)) { - struct lruvec *lruvec; - page = lru_to_page(list); + lruvec = mem_cgroup_page_lruvec(page, zone); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - lruvec = mem_cgroup_lru_add_list(zone, page, lru); + nr_pages = hpage_nr_pages(page); + mem_cgroup_update_lru_size(lruvec, lru, nr_pages); list_move(&page->lru, &lruvec->lists[lru]); - pgmoved += hpage_nr_pages(page); + pgmoved += nr_pages; if (put_page_testzero(page)) { __ClearPageLRU(page); __ClearPageActive(page); - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); if (unlikely(PageCompound(page))) { spin_unlock_irq(&zone->lru_lock); @@ -1677,9 +1362,9 @@ static void move_active_pages_to_lru(struct zone *zone, } static void shrink_active_list(unsigned long nr_to_scan, - struct mem_cgroup_zone *mz, + struct lruvec *lruvec, struct scan_control *sc, - int priority, int file) + enum lru_list lru) { unsigned long nr_taken; unsigned long nr_scanned; @@ -1688,15 +1373,14 @@ static void shrink_active_list(unsigned long nr_to_scan, LIST_HEAD(l_active); LIST_HEAD(l_inactive); struct page *page; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; unsigned long nr_rotated = 0; - isolate_mode_t isolate_mode = ISOLATE_ACTIVE; - struct zone *zone = mz->zone; + isolate_mode_t isolate_mode = 0; + int file = is_file_lru(lru); + struct zone *zone = lruvec_zone(lruvec); lru_add_drain(); - reset_reclaim_mode(sc); - if (!sc->may_unmap) isolate_mode |= ISOLATE_UNMAPPED; if (!sc->may_writepage) @@ -1704,18 +1388,15 @@ static void shrink_active_list(unsigned long nr_to_scan, spin_lock_irq(&zone->lru_lock); - nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, &nr_scanned, sc, - isolate_mode, 1, file); + nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, + &nr_scanned, sc, isolate_mode, lru); if (global_reclaim(sc)) zone->pages_scanned += nr_scanned; reclaim_stat->recent_scanned[file] += nr_taken; __count_zone_vm_events(PGREFILL, zone, nr_scanned); - if (file) - __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken); - else - __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken); + __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); spin_unlock_irq(&zone->lru_lock); @@ -1737,7 +1418,8 @@ static void shrink_active_list(unsigned long nr_to_scan, } } - if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) { + if (page_referenced(page, 0, sc->target_mem_cgroup, + &vm_flags)) { nr_rotated += hpage_nr_pages(page); /* * Identify referenced, file-backed active pages and @@ -1770,10 +1452,8 @@ static void shrink_active_list(unsigned long nr_to_scan, */ reclaim_stat->recent_rotated[file] += nr_rotated; - move_active_pages_to_lru(zone, &l_active, &l_hold, - LRU_ACTIVE + file * LRU_FILE); - move_active_pages_to_lru(zone, &l_inactive, &l_hold, - LRU_BASE + file * LRU_FILE); + move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); + move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); @@ -1796,13 +1476,12 @@ static int inactive_anon_is_low_global(struct zone *zone) /** * inactive_anon_is_low - check if anonymous pages need to be deactivated - * @zone: zone to check - * @sc: scan control of this context + * @lruvec: LRU vector to check * * Returns true if the zone does not have enough inactive anon pages, * meaning some active anon pages need to be deactivated. */ -static int inactive_anon_is_low(struct mem_cgroup_zone *mz) +static int inactive_anon_is_low(struct lruvec *lruvec) { /* * If we don't have swap space, anonymous page deactivation @@ -1811,14 +1490,13 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz) if (!total_swap_pages) return 0; - if (!scanning_global_lru(mz)) - return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, - mz->zone); + if (!mem_cgroup_disabled()) + return mem_cgroup_inactive_anon_is_low(lruvec); - return inactive_anon_is_low_global(mz->zone); + return inactive_anon_is_low_global(lruvec_zone(lruvec)); } #else -static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz) +static inline int inactive_anon_is_low(struct lruvec *lruvec) { return 0; } @@ -1836,7 +1514,7 @@ static int inactive_file_is_low_global(struct zone *zone) /** * inactive_file_is_low - check if file pages need to be deactivated - * @mz: memory cgroup and zone to check + * @lruvec: LRU vector to check * * When the system is doing streaming IO, memory pressure here * ensures that active file pages get deactivated, until more @@ -1848,44 +1526,39 @@ static int inactive_file_is_low_global(struct zone *zone) * This uses a different ratio than the anonymous pages, because * the page cache uses a use-once replacement algorithm. */ -static int inactive_file_is_low(struct mem_cgroup_zone *mz) +static int inactive_file_is_low(struct lruvec *lruvec) { - if (!scanning_global_lru(mz)) - return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, - mz->zone); + if (!mem_cgroup_disabled()) + return mem_cgroup_inactive_file_is_low(lruvec); - return inactive_file_is_low_global(mz->zone); + return inactive_file_is_low_global(lruvec_zone(lruvec)); } -static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file) +static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru) { - if (file) - return inactive_file_is_low(mz); + if (is_file_lru(lru)) + return inactive_file_is_low(lruvec); else - return inactive_anon_is_low(mz); + return inactive_anon_is_low(lruvec); } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct mem_cgroup_zone *mz, - struct scan_control *sc, int priority) + struct lruvec *lruvec, struct scan_control *sc) { - int file = is_file_lru(lru); - if (is_active_lru(lru)) { - if (inactive_list_is_low(mz, file)) - shrink_active_list(nr_to_scan, mz, sc, priority, file); + if (inactive_list_is_low(lruvec, lru)) + shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } - return shrink_inactive_list(nr_to_scan, mz, sc, priority, file); + return shrink_inactive_list(nr_to_scan, lruvec, sc, lru); } -static int vmscan_swappiness(struct mem_cgroup_zone *mz, - struct scan_control *sc) +static int vmscan_swappiness(struct scan_control *sc) { if (global_reclaim(sc)) return vm_swappiness; - return mem_cgroup_swappiness(mz->mem_cgroup); + return mem_cgroup_swappiness(sc->target_mem_cgroup); } /* @@ -1896,17 +1569,18 @@ static int vmscan_swappiness(struct mem_cgroup_zone *mz, * * nr[0] = anon pages to scan; nr[1] = file pages to scan */ -static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, - unsigned long *nr, int priority) +static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, + unsigned long *nr) { unsigned long anon, file, free; unsigned long anon_prio, file_prio; unsigned long ap, fp; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; u64 fraction[2], denominator; enum lru_list lru; int noswap = 0; bool force_scan = false; + struct zone *zone = lruvec_zone(lruvec); /* * If the zone or memcg is small, nr[l] can be 0. This @@ -1918,7 +1592,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * latencies, so it's better to scan a minimum amount there as * well. */ - if (current_is_kswapd() && mz->zone->all_unreclaimable) + if (current_is_kswapd() && zone->all_unreclaimable) force_scan = true; if (!global_reclaim(sc)) force_scan = true; @@ -1932,16 +1606,16 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, goto out; } - anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) + - zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); - file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) + - zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); + anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + + get_lru_size(lruvec, LRU_INACTIVE_ANON); + file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + + get_lru_size(lruvec, LRU_INACTIVE_FILE); if (global_reclaim(sc)) { - free = zone_page_state(mz->zone, NR_FREE_PAGES); + free = zone_page_state(zone, NR_FREE_PAGES); /* If we have very few page cache pages, force-scan anon pages. */ - if (unlikely(file + free <= high_wmark_pages(mz->zone))) { + if (unlikely(file + free <= high_wmark_pages(zone))) { fraction[0] = 1; fraction[1] = 0; denominator = 1; @@ -1953,8 +1627,8 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * With swappiness at 100, anonymous and file have the same priority. * This scanning priority is essentially the inverse of IO cost. */ - anon_prio = vmscan_swappiness(mz, sc); - file_prio = 200 - vmscan_swappiness(mz, sc); + anon_prio = vmscan_swappiness(sc); + file_prio = 200 - anon_prio; /* * OK, so we have swap space and a fair amount of page cache @@ -1967,7 +1641,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * * anon in [0], file in [1] */ - spin_lock_irq(&mz->zone->lru_lock); + spin_lock_irq(&zone->lru_lock); if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { reclaim_stat->recent_scanned[0] /= 2; reclaim_stat->recent_rotated[0] /= 2; @@ -1983,12 +1657,12 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * proportional to the fraction of recently scanned pages on * each list that were recently referenced and in active use. */ - ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1); + ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1); ap /= reclaim_stat->recent_rotated[0] + 1; - fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); + fp = file_prio * (reclaim_stat->recent_scanned[1] + 1); fp /= reclaim_stat->recent_rotated[1] + 1; - spin_unlock_irq(&mz->zone->lru_lock); + spin_unlock_irq(&zone->lru_lock); fraction[0] = ap; fraction[1] = fp; @@ -1998,9 +1672,9 @@ out: int file = is_file_lru(lru); unsigned long scan; - scan = zone_nr_lru_pages(mz, lru); - if (priority || noswap) { - scan >>= priority; + scan = get_lru_size(lruvec, lru); + if (sc->priority || noswap || !vmscan_swappiness(sc)) { + scan >>= sc->priority; if (!scan && force_scan) scan = SWAP_CLUSTER_MAX; scan = div64_u64(scan * fraction[file], denominator); @@ -2009,14 +1683,25 @@ out: } } +/* Use reclaim/compaction for costly allocs or under memory pressure */ +static bool in_reclaim_compaction(struct scan_control *sc) +{ + if (COMPACTION_BUILD && sc->order && + (sc->order > PAGE_ALLOC_COSTLY_ORDER || + sc->priority < DEF_PRIORITY - 2)) + return true; + + return false; +} + /* - * Reclaim/compaction depends on a number of pages being freed. To avoid - * disruption to the system, a small number of order-0 pages continue to be - * rotated and reclaimed in the normal fashion. However, by the time we get - * back to the allocator and call try_to_compact_zone(), we ensure that - * there are enough free pages for it to be likely successful + * Reclaim/compaction is used for high-order allocation requests. It reclaims + * order-0 pages before compacting the zone. should_continue_reclaim() returns + * true if more pages should be reclaimed such that when the page allocator + * calls try_to_compact_zone() that it will have enough free pages to succeed. + * It will give up earlier than that if there is difficulty reclaiming pages. */ -static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, +static inline bool should_continue_reclaim(struct lruvec *lruvec, unsigned long nr_reclaimed, unsigned long nr_scanned, struct scan_control *sc) @@ -2025,7 +1710,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, unsigned long inactive_lru_pages; /* If not in reclaim/compaction mode, stop */ - if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION)) + if (!in_reclaim_compaction(sc)) return false; /* Consider stopping depending on scan and reclaim activity */ @@ -2056,15 +1741,15 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, * inactive lists are large enough, continue reclaiming */ pages_for_compaction = (2UL << sc->order); - inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); + inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); if (nr_swap_pages > 0) - inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); + inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) return true; /* If compaction would go ahead or the allocation would succeed, stop */ - switch (compaction_suitable(mz->zone, sc->order)) { + switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) { case COMPACT_PARTIAL: case COMPACT_CONTINUE: return false; @@ -2076,8 +1761,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ -static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, - struct scan_control *sc) +static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { unsigned long nr[NR_LRU_LISTS]; unsigned long nr_to_scan; @@ -2089,7 +1773,7 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, restart: nr_reclaimed = 0; nr_scanned = sc->nr_scanned; - get_scan_count(mz, sc, nr, priority); + get_scan_count(lruvec, sc, nr); blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -2101,7 +1785,7 @@ restart: nr[lru] -= nr_to_scan; nr_reclaimed += shrink_list(lru, nr_to_scan, - mz, sc, priority); + lruvec, sc); } } /* @@ -2112,7 +1796,8 @@ restart: * with multiple processes reclaiming pages, the total * freeing target can get unreasonably large. */ - if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) + if (nr_reclaimed >= nr_to_reclaim && + sc->priority < DEF_PRIORITY) break; } blk_finish_plug(&plug); @@ -2122,35 +1807,33 @@ restart: * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_anon_is_low(mz)) - shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0); + if (inactive_anon_is_low(lruvec)) + shrink_active_list(SWAP_CLUSTER_MAX, lruvec, + sc, LRU_ACTIVE_ANON); /* reclaim/compaction might need reclaim to continue */ - if (should_continue_reclaim(mz, nr_reclaimed, - sc->nr_scanned - nr_scanned, sc)) + if (should_continue_reclaim(lruvec, nr_reclaimed, + sc->nr_scanned - nr_scanned, sc)) goto restart; throttle_vm_writeout(sc->gfp_mask); } -static void shrink_zone(int priority, struct zone *zone, - struct scan_control *sc) +static void shrink_zone(struct zone *zone, struct scan_control *sc) { struct mem_cgroup *root = sc->target_mem_cgroup; struct mem_cgroup_reclaim_cookie reclaim = { .zone = zone, - .priority = priority, + .priority = sc->priority, }; struct mem_cgroup *memcg; memcg = mem_cgroup_iter(root, NULL, &reclaim); do { - struct mem_cgroup_zone mz = { - .mem_cgroup = memcg, - .zone = zone, - }; + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); + + shrink_lruvec(lruvec, sc); - shrink_mem_cgroup_zone(priority, &mz, sc); /* * Limit reclaim has historically picked one memcg and * scanned it with decreasing priority levels until @@ -2226,8 +1909,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) * the caller that it should consider retrying the allocation instead of * further reclaim. */ -static bool shrink_zones(int priority, struct zonelist *zonelist, - struct scan_control *sc) +static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; @@ -2254,7 +1936,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, if (global_reclaim(sc)) { if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc->priority != DEF_PRIORITY) continue; /* Let kswapd poll it */ if (COMPACTION_BUILD) { /* @@ -2286,7 +1969,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, /* need some check for avoid more shrink_zone() */ } - shrink_zone(priority, zone, sc); + shrink_zone(zone, sc); } return aborted_reclaim; @@ -2337,7 +2020,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc, struct shrink_control *shrink) { - int priority; unsigned long total_scanned = 0; struct reclaim_state *reclaim_state = current->reclaim_state; struct zoneref *z; @@ -2350,11 +2032,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, if (global_reclaim(sc)) count_vm_event(ALLOCSTALL); - for (priority = DEF_PRIORITY; priority >= 0; priority--) { + do { sc->nr_scanned = 0; - if (!priority) - disable_swap_token(sc->target_mem_cgroup); - aborted_reclaim = shrink_zones(priority, zonelist, sc); + aborted_reclaim = shrink_zones(zonelist, sc); /* * Don't shrink slabs when reclaiming memory from @@ -2396,7 +2076,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, /* Take a nap, wait for some writeback to complete */ if (!sc->hibernation_mode && sc->nr_scanned && - priority < DEF_PRIORITY - 2) { + sc->priority < DEF_PRIORITY - 2) { struct zone *preferred_zone; first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), @@ -2404,7 +2084,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, &preferred_zone); wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); } - } + } while (--sc->priority >= 0); out: delayacct_freepages_end(); @@ -2442,6 +2122,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .may_unmap = 1, .may_swap = 1, .order = order, + .priority = DEF_PRIORITY, .target_mem_cgroup = NULL, .nodemask = nodemask, }; @@ -2474,17 +2155,15 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, .may_unmap = 1, .may_swap = !noswap, .order = 0, + .priority = 0, .target_mem_cgroup = memcg, }; - struct mem_cgroup_zone mz = { - .mem_cgroup = memcg, - .zone = zone, - }; + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); - trace_mm_vmscan_memcg_softlimit_reclaim_begin(0, + trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order, sc.may_writepage, sc.gfp_mask); @@ -2495,7 +2174,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, * will pick up pages from other mem cgroup's as well. We hack * the priority and make it zero. */ - shrink_mem_cgroup_zone(0, &mz, &sc); + shrink_lruvec(lruvec, &sc); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); @@ -2516,6 +2195,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_swap = !noswap, .nr_to_reclaim = SWAP_CLUSTER_MAX, .order = 0, + .priority = DEF_PRIORITY, .target_mem_cgroup = memcg, .nodemask = NULL, /* we don't care the placement */ .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | @@ -2546,8 +2226,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, } #endif -static void age_active_anon(struct zone *zone, struct scan_control *sc, - int priority) +static void age_active_anon(struct zone *zone, struct scan_control *sc) { struct mem_cgroup *memcg; @@ -2556,14 +2235,11 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc, memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - struct mem_cgroup_zone mz = { - .mem_cgroup = memcg, - .zone = zone, - }; + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); - if (inactive_anon_is_low(&mz)) - shrink_active_list(SWAP_CLUSTER_MAX, &mz, - sc, priority, 0); + if (inactive_anon_is_low(lruvec)) + shrink_active_list(SWAP_CLUSTER_MAX, lruvec, + sc, LRU_ACTIVE_ANON); memcg = mem_cgroup_iter(NULL, memcg, NULL); } while (memcg); @@ -2672,7 +2348,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, { int all_zones_ok; unsigned long balanced; - int priority; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long total_scanned; @@ -2696,18 +2371,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, }; loop_again: total_scanned = 0; + sc.priority = DEF_PRIORITY; sc.nr_reclaimed = 0; sc.may_writepage = !laptop_mode; count_vm_event(PAGEOUTRUN); - for (priority = DEF_PRIORITY; priority >= 0; priority--) { + do { unsigned long lru_pages = 0; int has_under_min_watermark_zone = 0; - /* The swap token gets in the way of swapout... */ - if (!priority) - disable_swap_token(NULL); - all_zones_ok = 1; balanced = 0; @@ -2721,14 +2393,15 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) continue; /* * Do some background aging of the anon list, to give * pages a chance to be referenced before reclaiming. */ - age_active_anon(zone, &sc, priority); + age_active_anon(zone, &sc); /* * If the number of buffer_heads in the machine @@ -2776,7 +2449,8 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) continue; sc.nr_scanned = 0; @@ -2820,7 +2494,7 @@ loop_again: !zone_watermark_ok_safe(zone, testorder, high_wmark_pages(zone) + balance_gap, end_zone, 0)) { - shrink_zone(priority, zone, &sc); + shrink_zone(zone, &sc); reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); @@ -2877,7 +2551,7 @@ loop_again: * OK, kswapd is getting into trouble. Take a nap, then take * another pass across the zones. */ - if (total_scanned && (priority < DEF_PRIORITY - 2)) { + if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) { if (has_under_min_watermark_zone) count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); else @@ -2892,7 +2566,7 @@ loop_again: */ if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) break; - } + } while (--sc.priority >= 0); out: /* @@ -2942,7 +2616,8 @@ out: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) continue; /* Would compaction fail due to lack of free memory? */ @@ -3209,6 +2884,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) .nr_to_reclaim = nr_to_reclaim, .hibernation_mode = 1, .order = 0, + .priority = DEF_PRIORITY, }; struct shrink_control shrink = { .gfp_mask = sc.gfp_mask, @@ -3386,7 +3062,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) const unsigned long nr_pages = 1 << order; struct task_struct *p = current; struct reclaim_state reclaim_state; - int priority; struct scan_control sc = { .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), @@ -3395,6 +3070,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) SWAP_CLUSTER_MAX), .gfp_mask = gfp_mask, .order = order, + .priority = ZONE_RECLAIM_PRIORITY, }; struct shrink_control shrink = { .gfp_mask = sc.gfp_mask, @@ -3417,11 +3093,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * Free memory by calling shrink zone with increasing * priorities until we have enough memory freed. */ - priority = ZONE_RECLAIM_PRIORITY; do { - shrink_zone(priority, zone, &sc); - priority--; - } while (priority >= 0 && sc.nr_reclaimed < nr_pages); + shrink_zone(zone, &sc); + } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); } nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); @@ -3536,7 +3210,7 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) if (mapping_unevictable(page_mapping(page))) return 0; - if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page))) + if (PageMlocked(page) || (vma && mlocked_vma_newpage(vma, page))) return 0; return 1; @@ -3572,6 +3246,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) zone = pagezone; spin_lock_irq(&zone->lru_lock); } + lruvec = mem_cgroup_page_lruvec(page, zone); if (!PageLRU(page) || !PageUnevictable(page)) continue; @@ -3581,11 +3256,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) VM_BUG_ON(PageActive(page)); ClearPageUnevictable(page); - __dec_zone_state(zone, NR_UNEVICTABLE); - lruvec = mem_cgroup_lru_move_lists(zone, page, - LRU_UNEVICTABLE, lru); - list_move(&page->lru, &lruvec->lists[lru]); - __inc_zone_state(zone, NR_INACTIVE_ANON + lru); + del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE); + add_page_to_lru_list(page, lruvec, lru); pgrescued++; } } diff --git a/mm/vmstat.c b/mm/vmstat.c index 0dad31dc1618..1bbbbd9776ad 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1223,7 +1223,6 @@ module_init(setup_vmstat) #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) #include <linux/debugfs.h> -static struct dentry *extfrag_debug_root; /* * Return an index indicating how much of the available free memory is @@ -1361,19 +1360,24 @@ static const struct file_operations extfrag_file_ops = { static int __init extfrag_debug_init(void) { + struct dentry *extfrag_debug_root; + extfrag_debug_root = debugfs_create_dir("extfrag", NULL); if (!extfrag_debug_root) return -ENOMEM; if (!debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL, &unusable_file_ops)) - return -ENOMEM; + goto fail; if (!debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL, &extfrag_file_ops)) - return -ENOMEM; + goto fail; return 0; +fail: + debugfs_remove_recursive(extfrag_debug_root); + return -ENOMEM; } module_init(extfrag_debug_init); diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 151703791bb0..b6f3583ddfe8 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -74,9 +74,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) percpu_counter_destroy(&tcp->tcp_sockets_allocated); val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); - - if (val != RESOURCE_MAX) - static_key_slow_dec(&memcg_socket_limit_enabled); } EXPORT_SYMBOL(tcp_destroy_cgroup); @@ -107,10 +104,33 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, net->ipv4.sysctl_tcp_mem[i]); - if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) - static_key_slow_dec(&memcg_socket_limit_enabled); - else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) - static_key_slow_inc(&memcg_socket_limit_enabled); + if (val == RESOURCE_MAX) + clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); + else if (val != RESOURCE_MAX) { + /* + * The active bit needs to be written after the static_key + * update. This is what guarantees that the socket activation + * function is the last one to run. See sock_update_memcg() for + * details, and note that we don't mark any socket as belonging + * to this memcg until that flag is up. + * + * We need to do this, because static_keys will span multiple + * sites, but we can't control their order. If we mark a socket + * as accounted, but the accounting functions are not patched in + * yet, we'll lose accounting. + * + * We never race with the readers in sock_update_memcg(), + * because when this value change, the code to process it is not + * patched in yet. + * + * The activated bit is used to guarantee that no two writers + * will do the update in the same memcg. Without that, we can't + * properly shutdown the static key. + */ + if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) + static_key_slow_inc(&memcg_socket_limit_enabled); + set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); + } return 0; } diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 7dab7b25b5c6..f576971f6556 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -35,6 +35,7 @@ #include <sys/mount.h> #include <sys/statfs.h> #include "../../include/linux/magic.h" +#include "../../include/linux/kernel-page-flags.h" #ifndef MAX_PATH @@ -73,33 +74,6 @@ #define KPF_BYTES 8 #define PROC_KPAGEFLAGS "/proc/kpageflags" -/* copied from kpageflags_read() */ -#define KPF_LOCKED 0 -#define KPF_ERROR 1 -#define KPF_REFERENCED 2 -#define KPF_UPTODATE 3 -#define KPF_DIRTY 4 -#define KPF_LRU 5 -#define KPF_ACTIVE 6 -#define KPF_SLAB 7 -#define KPF_WRITEBACK 8 -#define KPF_RECLAIM 9 -#define KPF_BUDDY 10 - -/* [11-20] new additions in 2.6.31 */ -#define KPF_MMAP 11 -#define KPF_ANON 12 -#define KPF_SWAPCACHE 13 -#define KPF_SWAPBACKED 14 -#define KPF_COMPOUND_HEAD 15 -#define KPF_COMPOUND_TAIL 16 -#define KPF_HUGE 17 -#define KPF_UNEVICTABLE 18 -#define KPF_HWPOISON 19 -#define KPF_NOPAGE 20 -#define KPF_KSM 21 -#define KPF_THP 22 - /* [32-] kernel hacking assistances */ #define KPF_RESERVED 32 #define KPF_MLOCKED 33 @@ -326,7 +300,7 @@ static char *page_flag_name(uint64_t flags) { static char buf[65]; int present; - int i, j; + size_t i, j; for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { present = (flags >> i) & 1; @@ -344,7 +318,7 @@ static char *page_flag_name(uint64_t flags) static char *page_flag_longname(uint64_t flags) { static char buf[1024]; - int i, n; + size_t i, n; for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) { if (!page_flag_names[i]) @@ -402,7 +376,7 @@ static void show_page(unsigned long voffset, static void show_summary(void) { - int i; + size_t i; printf(" flags\tpage-count MB" " symbolic-flags\t\t\tlong-symbolic-flags\n"); @@ -500,7 +474,7 @@ static int debugfs_valid_mountpoint(const char *debugfs) /* find the path to the mounted debugfs */ static const char *debugfs_find_mountpoint(void) { - const char **ptr; + const char *const *ptr; char type[100]; FILE *fp; @@ -537,7 +511,7 @@ static const char *debugfs_find_mountpoint(void) static void debugfs_mount(void) { - const char **ptr; + const char *const *ptr; /* see if it's already mounted */ if (debugfs_find_mountpoint()) @@ -614,10 +588,10 @@ static int unpoison_page(unsigned long offset) * page frame walker */ -static int hash_slot(uint64_t flags) +static size_t hash_slot(uint64_t flags) { - int k = HASH_KEY(flags); - int i; + size_t k = HASH_KEY(flags); + size_t i; /* Explicitly reserve slot 0 for flags 0: the following logic * cannot distinguish an unoccupied slot from slot (flags==0). @@ -670,7 +644,7 @@ static void walk_pfn(unsigned long voffset, { uint64_t buf[KPAGEFLAGS_BATCH]; unsigned long batch; - long pages; + unsigned long pages; unsigned long i; while (count) { @@ -779,7 +753,7 @@ static const char *page_flag_type(uint64_t flag) static void usage(void) { - int i, j; + size_t i, j; printf( "page-types [options]\n" @@ -938,7 +912,7 @@ static void add_bits_filter(uint64_t mask, uint64_t bits) static uint64_t parse_flag_name(const char *str, int len) { - int i; + size_t i; if (!*str || !len) return 0; |