From 466b3ddfbcf4f5ce402a77397630a0fa9ea9ce6b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Sun, 30 Oct 2011 16:35:08 +0100
Subject: PCI: Fix compile errors with PCI_ATS and !PCI_IOV

The ats and sroiv members of 'struct pci_dev' are required
for the ATS code already, even without IOV support compiled
in. So depend on ATS here. This is fine with PCI_IOV too
because it selects PCI_ATS. Also the prototypes for ATS
need to be available for PCI_ATS.

Reported-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci-ats.h | 6 +++---
 include/linux/pci.h     | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index e3d0b3890249..7ef68724f0f0 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -12,7 +12,7 @@ struct pci_ats {
 	unsigned int is_enabled:1;      /* Enable bit is set */
 };
 
-#ifdef CONFIG_PCI_IOV
+#ifdef CONFIG_PCI_ATS
 
 extern int pci_enable_ats(struct pci_dev *dev, int ps);
 extern void pci_disable_ats(struct pci_dev *dev);
@@ -29,7 +29,7 @@ static inline int pci_ats_enabled(struct pci_dev *dev)
 	return dev->ats && dev->ats->is_enabled;
 }
 
-#else /* CONFIG_PCI_IOV */
+#else /* CONFIG_PCI_ATS */
 
 static inline int pci_enable_ats(struct pci_dev *dev, int ps)
 {
@@ -50,7 +50,7 @@ static inline int pci_ats_enabled(struct pci_dev *dev)
 	return 0;
 }
 
-#endif /* CONFIG_PCI_IOV */
+#endif /* CONFIG_PCI_ATS */
 
 #ifdef CONFIG_PCI_PRI
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 337df0d5d5f7..7cda65b5f798 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -338,7 +338,7 @@ struct pci_dev {
 	struct list_head msi_list;
 #endif
 	struct pci_vpd *vpd;
-#ifdef CONFIG_PCI_IOV
+#ifdef CONFIG_PCI_ATS
 	union {
 		struct pci_sriov *sriov;	/* SR-IOV capability related */
 		struct pci_dev *physfn;	/* the PF this VF is associated with */
-- 
cgit v1.2.3


From 1788ea6e3b2a58cf4fb00206e362d9caff8d86a7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 4 Nov 2011 13:31:21 -0400
Subject: nfs: when attempting to open a directory, fall back on normal lookup
 (try #5)

commit d953126 changed how nfs_atomic_lookup handles an -EISDIR return
from an OPEN call. Prior to that patch, that caused the client to fall
back to doing a normal lookup. When that patch went in, the code began
returning that error to userspace. The d_revalidate codepath however
never had the corresponding change, so it was still possible to end up
with a NULL ctx->state pointer after that.

That patch caused a regression. When we attempt to open a directory that
does not have a cached dentry, that open now errors out with EISDIR. If
you attempt the same open with a cached dentry, it will succeed.

Fix this by reverting the change in nfs_atomic_lookup and allowing
attempts to open directories to fall back to a normal lookup

Also, add a NFSv4-specific f_ops->open routine that just returns
-ENOTDIR. This should never be called if things are working properly,
but if it ever is, then the dprintk may help in debugging.

To facilitate this, a new file_operations field is also added to the
nfs_rpc_ops struct.

Cc: stable@kernel.org
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            |  2 +-
 fs/nfs/file.c           | 32 ++++++++++++++++++++++++++++++++
 fs/nfs/inode.c          |  2 +-
 fs/nfs/nfs3proc.c       |  1 +
 fs/nfs/nfs4proc.c       |  1 +
 fs/nfs/proc.c           |  1 +
 include/linux/nfs_fs.h  |  3 +++
 include/linux/nfs_xdr.h |  1 +
 8 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b238d95ac48c..ac2899098147 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1468,12 +1468,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 				res = NULL;
 				goto out;
 			/* This turned out not to be a regular file */
+			case -EISDIR:
 			case -ENOTDIR:
 				goto no_open;
 			case -ELOOP:
 				if (!(nd->intent.open.flags & O_NOFOLLOW))
 					goto no_open;
-			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
 				res = ERR_CAST(inode);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0a1f8312b4dc..6d93e0754b5e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -886,3 +886,35 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
 			file->f_path.dentry->d_name.name, arg);
 	return -EINVAL;
 }
+
+#ifdef CONFIG_NFS_V4
+static int
+nfs4_file_open(struct inode *inode, struct file *filp)
+{
+	/*
+	 * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to
+	 * this point, then something is very wrong
+	 */
+	dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp);
+	return -ENOTDIR;
+}
+
+const struct file_operations nfs4_file_operations = {
+	.llseek		= nfs_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read	= nfs_file_read,
+	.aio_write	= nfs_file_write,
+	.mmap		= nfs_file_mmap,
+	.open		= nfs4_file_open,
+	.flush		= nfs_file_flush,
+	.release	= nfs_file_release,
+	.fsync		= nfs_file_fsync,
+	.lock		= nfs_lock,
+	.flock		= nfs_flock,
+	.splice_read	= nfs_file_splice_read,
+	.splice_write	= nfs_file_splice_write,
+	.check_flags	= nfs_check_flags,
+	.setlease	= nfs_setlease,
+};
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c07a55aec838..50a15fa8cf98 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -291,7 +291,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		 */
 		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
 		if (S_ISREG(inode->i_mode)) {
-			inode->i_fop = &nfs_file_operations;
+			inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
 			inode->i_data.a_ops = &nfs_file_aops;
 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
 		} else if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 85f1690ca08c..d4bc9ed91748 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -853,6 +853,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs3_dir_inode_operations,
 	.file_inode_ops	= &nfs3_file_inode_operations,
+	.file_ops	= &nfs_file_operations,
 	.getroot	= nfs3_proc_get_root,
 	.getattr	= nfs3_proc_getattr,
 	.setattr	= nfs3_proc_setattr,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b60fddf606f7..069cb8094d43 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6253,6 +6253,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
 	.file_inode_ops	= &nfs4_file_inode_operations,
+	.file_ops	= &nfs4_file_operations,
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ac40b8535d7e..f48125da198a 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -710,6 +710,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
 	.file_inode_ops	= &nfs_file_inode_operations,
+	.file_ops	= &nfs_file_operations,
 	.getroot	= nfs_proc_get_root,
 	.getattr	= nfs_proc_getattr,
 	.setattr	= nfs_proc_setattr,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ab2c6343361a..92ecf5585fac 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -410,6 +410,9 @@ extern const struct inode_operations nfs_file_inode_operations;
 extern const struct inode_operations nfs3_file_inode_operations;
 #endif /* CONFIG_NFS_V3 */
 extern const struct file_operations nfs_file_operations;
+#ifdef CONFIG_NFS_V4
+extern const struct file_operations nfs4_file_operations;
+#endif /* CONFIG_NFS_V4 */
 extern const struct address_space_operations nfs_file_aops;
 extern const struct address_space_operations nfs_dir_aops;
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index c74595ba7094..2a7c533be5dd 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1192,6 +1192,7 @@ struct nfs_rpc_ops {
 	const struct dentry_operations *dentry_ops;
 	const struct inode_operations *dir_inode_ops;
 	const struct inode_operations *file_inode_ops;
+	const struct file_operations *file_ops;
 
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
-- 
cgit v1.2.3


From d65670a78cdbfae94f20a9e05ec705871d7cdf2b Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 31 Oct 2011 17:06:35 -0400
Subject: clocksource: Avoid selecting mult values that might overflow when
 adjusted

For some frequencies, the clocks_calc_mult_shift() function will
unfortunately select mult values very close to 0xffffffff.  This
has the potential to overflow when NTP adjusts the clock, adding
to the mult value.

This patch adds a clocksource.maxadj value, which provides
an approximation of an 11% adjustment(NTP limits adjustments to
500ppm and the tick adjustment is limited to 10%), which could
be made to the clocksource.mult value. This is then used to both
check that the current mult value won't overflow/underflow, as
well as warning us if the timekeeping_adjust() code pushes over
that 11% boundary.

v2: Fix max_adjustment calculation, and improve WARN_ONCE
messages.

v3: Don't warn before maxadj has actually been set

CC: Yong Zhang <yong.zhang0@gmail.com>
CC: David Daney <ddaney.cavm@gmail.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Chen Jie <chenj@lemote.com>
CC: zhangfx <zhangfx@lemote.com>
CC: stable@kernel.org
Reported-by: Chen Jie <chenj@lemote.com>
Reported-by: zhangfx <zhangfx@lemote.com>
Tested-by: Yong Zhang <yong.zhang0@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/clocksource.h |  3 ++-
 kernel/time/clocksource.c   | 58 +++++++++++++++++++++++++++++++++++++--------
 kernel/time/timekeeping.c   |  7 ++++++
 3 files changed, 57 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 139c4db55f17..c86c940d1de3 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -156,6 +156,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  * @mult:		cycle to nanosecond multiplier
  * @shift:		cycle to nanosecond divisor (power of two)
  * @max_idle_ns:	max idle time permitted by the clocksource (nsecs)
+ * @maxadj		maximum adjustment value to mult (~11%)
  * @flags:		flags describing special properties
  * @archdata:		arch-specific data
  * @suspend:		suspend function for the clocksource, if necessary
@@ -172,7 +173,7 @@ struct clocksource {
 	u32 mult;
 	u32 shift;
 	u64 max_idle_ns;
-
+	u32 maxadj;
 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
 	struct arch_clocksource_data archdata;
 #endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index cf52fda2e096..cfc65e1eb9fb 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -491,6 +491,22 @@ void clocksource_touch_watchdog(void)
 	clocksource_resume_watchdog();
 }
 
+/**
+ * clocksource_max_adjustment- Returns max adjustment amount
+ * @cs:         Pointer to clocksource
+ *
+ */
+static u32 clocksource_max_adjustment(struct clocksource *cs)
+{
+	u64 ret;
+	/*
+	 * We won't try to correct for more then 11% adjustments (110,000 ppm),
+	 */
+	ret = (u64)cs->mult * 11;
+	do_div(ret,100);
+	return (u32)ret;
+}
+
 /**
  * clocksource_max_deferment - Returns max time the clocksource can be deferred
  * @cs:         Pointer to clocksource
@@ -503,25 +519,28 @@ static u64 clocksource_max_deferment(struct clocksource *cs)
 	/*
 	 * Calculate the maximum number of cycles that we can pass to the
 	 * cyc2ns function without overflowing a 64-bit signed result. The
-	 * maximum number of cycles is equal to ULLONG_MAX/cs->mult which
-	 * is equivalent to the below.
-	 * max_cycles < (2^63)/cs->mult
-	 * max_cycles < 2^(log2((2^63)/cs->mult))
-	 * max_cycles < 2^(log2(2^63) - log2(cs->mult))
-	 * max_cycles < 2^(63 - log2(cs->mult))
-	 * max_cycles < 1 << (63 - log2(cs->mult))
+	 * maximum number of cycles is equal to ULLONG_MAX/(cs->mult+cs->maxadj)
+	 * which is equivalent to the below.
+	 * max_cycles < (2^63)/(cs->mult + cs->maxadj)
+	 * max_cycles < 2^(log2((2^63)/(cs->mult + cs->maxadj)))
+	 * max_cycles < 2^(log2(2^63) - log2(cs->mult + cs->maxadj))
+	 * max_cycles < 2^(63 - log2(cs->mult + cs->maxadj))
+	 * max_cycles < 1 << (63 - log2(cs->mult + cs->maxadj))
 	 * Please note that we add 1 to the result of the log2 to account for
 	 * any rounding errors, ensure the above inequality is satisfied and
 	 * no overflow will occur.
 	 */
-	max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1));
+	max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1));
 
 	/*
 	 * The actual maximum number of cycles we can defer the clocksource is
 	 * determined by the minimum of max_cycles and cs->mask.
+	 * Note: Here we subtract the maxadj to make sure we don't sleep for
+	 * too long if there's a large negative adjustment.
 	 */
 	max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
-	max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift);
+	max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj,
+					cs->shift);
 
 	/*
 	 * To ensure that the clocksource does not wrap whilst we are idle,
@@ -640,7 +659,6 @@ static void clocksource_enqueue(struct clocksource *cs)
 void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
 	u64 sec;
-
 	/*
 	 * Calc the maximum number of seconds which we can run before
 	 * wrapping around. For clocksources which have a mask > 32bit
@@ -661,6 +679,20 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 
 	clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
 			       NSEC_PER_SEC / scale, sec * scale);
+
+	/*
+	 * for clocksources that have large mults, to avoid overflow.
+	 * Since mult may be adjusted by ntp, add an safety extra margin
+	 *
+	 */
+	cs->maxadj = clocksource_max_adjustment(cs);
+	while ((cs->mult + cs->maxadj < cs->mult)
+		|| (cs->mult - cs->maxadj > cs->mult)) {
+		cs->mult >>= 1;
+		cs->shift--;
+		cs->maxadj = clocksource_max_adjustment(cs);
+	}
+
 	cs->max_idle_ns = clocksource_max_deferment(cs);
 }
 EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
@@ -701,6 +733,12 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale);
  */
 int clocksource_register(struct clocksource *cs)
 {
+	/* calculate max adjustment for given mult/shift */
+	cs->maxadj = clocksource_max_adjustment(cs);
+	WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
+		"Clocksource %s might overflow on 11%% adjustment\n",
+		cs->name);
+
 	/* calculate max idle time permitted for this clocksource */
 	cs->max_idle_ns = clocksource_max_deferment(cs);
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2b021b0e8507..e65ff3171102 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -820,6 +820,13 @@ static void timekeeping_adjust(s64 offset)
 	} else
 		return;
 
+	WARN_ONCE(timekeeper.clock->maxadj &&
+			(timekeeper.mult + adj > timekeeper.clock->mult +
+						timekeeper.clock->maxadj),
+			"Adjusting %s more then 11%% (%ld vs %ld)\n",
+			timekeeper.clock->name, (long)timekeeper.mult + adj,
+			(long)timekeeper.clock->mult +
+				timekeeper.clock->maxadj);
 	timekeeper.mult += adj;
 	timekeeper.xtime_interval += interval;
 	timekeeper.xtime_nsec -= offset;
-- 
cgit v1.2.3


From 224736d9113ab4a7cf3f05c05377492bd99b4b02 Mon Sep 17 00:00:00 2001
From: Stratos Psomadakis <psomas@grnet.gr>
Date: Thu, 10 Nov 2011 15:45:37 +0200
Subject: libceph: Allocate larger oid buffer in request msgs

ceph_osd_request struct allocates a 40-byte buffer for object names.
RBD image names can be up to 96 chars long (100 with the .rbd suffix),
which results in the object name for the image being truncated, and a
subsequent map failure.

Increase the oid buffer in request messages, in order to avoid the
truncation.

Signed-off-by: Stratos Psomadakis <psomas@grnet.gr>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 include/linux/ceph/osd_client.h | 8 +++++++-
 net/ceph/osd_client.c           | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index f88eacb111d4..7c05ac202d90 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -10,6 +10,12 @@
 #include "osdmap.h"
 #include "messenger.h"
 
+/* 
+ * Maximum object name size 
+ * (must be at least as big as RBD_MAX_MD_NAME_LEN -- currently 100) 
+ */
+#define MAX_OBJ_NAME_SIZE 100
+
 struct ceph_msg;
 struct ceph_snap_context;
 struct ceph_osd_request;
@@ -75,7 +81,7 @@ struct ceph_osd_request {
 	struct inode *r_inode;         	      /* for use by callbacks */
 	void *r_priv;			      /* ditto */
 
-	char              r_oid[40];          /* object name */
+	char              r_oid[MAX_OBJ_NAME_SIZE];          /* object name */
 	int               r_oid_len;
 	unsigned long     r_stamp;            /* send OR check time */
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 733e46008b89..f4f3f58f5234 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -244,7 +244,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 		ceph_pagelist_init(req->r_trail);
 	}
 	/* create request message; allow space for oid */
-	msg_size += 40;
+	msg_size += MAX_OBJ_NAME_SIZE;
 	if (snapc)
 		msg_size += sizeof(u64) * snapc->num_snaps;
 	if (use_mempool)
-- 
cgit v1.2.3


From 7b08fae8fbf0c14f003be8e039ed37bcbae4415a Mon Sep 17 00:00:00 2001
From: Marcos Paulo de Souza <marcos.mage@gmail.com>
Date: Tue, 1 Nov 2011 11:15:40 -0700
Subject: device.h: Fix struct member documentation

Fix warning of make xmldocs of documention of the struct member iommu_ops from struct bus_type.

Signed-off-by: Marcos Paulo de Souza <marcos.mage@gmail.com>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index ffbcf95cd97d..2b8832060893 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -69,7 +69,7 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  * @resume:	Called to bring a device on this bus out of sleep mode.
  * @pm:		Power management operations of this bus, callback the specific
  *		device driver's pm-ops.
- * @iommu_ops   IOMMU specific operations for this bus, used to attach IOMMU
+ * @iommu_ops:  IOMMU specific operations for this bus, used to attach IOMMU
  *              driver implementations to a bus and allow the driver to do
  *              bus-specific setup
  * @p:		The private data of the driver core, only the driver core can
-- 
cgit v1.2.3


From 93f3350c46fa5dfcc9650eb19b186e71ffc924c3 Mon Sep 17 00:00:00 2001
From: Claudio Scordino <claudio@evidence.eu.com>
Date: Wed, 9 Nov 2011 15:51:49 +0100
Subject: RS485: fix inconsistencies in the meaning of some variables

The crisv10.c and the atmel_serial.c serial drivers intepret the fields of the
serial_rs485 structure in a different way.

In particular, crisv10.c uses SER_RS485_RTS_AFTER_SEND and
SER_RS485_RTS_ON_SEND for the voltage of the RTS pin; atmel_serial.c,
instead, uses these values to know if a delay must be set before and
after sending.  This patch makes the usage of these variables consistent
across all drivers and fixes the Documentation as well.

From now on, SER_RS485_RTS_AFTER_SEND and SER_RS485_RTS_ON_SEND will be
used to set the voltage of the RTS pin (as in the crisv10.c driver); the
delay will be understood by looking only at the value of
delay_rts_before_send and delay_rts_after_send.

Signed-off-by: Claudio Scordino <claudio@evidence.eu.com>
Signed-off-by: Darron Black <darron@griffin.net>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/serial/serial-rs485.txt | 14 +++++++++++---
 drivers/tty/serial/atmel_serial.c     | 16 +++-------------
 drivers/tty/serial/crisv10.c          | 10 ++--------
 include/linux/serial.h                | 14 ++++++++------
 4 files changed, 24 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/serial/serial-rs485.txt b/Documentation/serial/serial-rs485.txt
index 079cb3df62cf..41c8378c0b2f 100644
--- a/Documentation/serial/serial-rs485.txt
+++ b/Documentation/serial/serial-rs485.txt
@@ -97,15 +97,23 @@
 
 	struct serial_rs485 rs485conf;
 
-	/* Set RS485 mode: */
+	/* Enable RS485 mode: */
 	rs485conf.flags |= SER_RS485_ENABLED;
 
+	/* Set logical level for RTS pin equal to 1 when sending: */
+	rs485conf.flags |= SER_RS485_RTS_ON_SEND;
+	/* or, set logical level for RTS pin equal to 0 when sending: */
+	rs485conf.flags &= ~(SER_RS485_RTS_ON_SEND);
+
+	/* Set logical level for RTS pin equal to 1 after sending: */
+	rs485conf.flags |= SER_RS485_RTS_AFTER_SEND;
+	/* or, set logical level for RTS pin equal to 0 after sending: */
+	rs485conf.flags &= ~(SER_RS485_RTS_AFTER_SEND);
+
 	/* Set rts delay before send, if needed: */
-	rs485conf.flags |= SER_RS485_RTS_BEFORE_SEND;
 	rs485conf.delay_rts_before_send = ...;
 
 	/* Set rts delay after send, if needed: */
-	rs485conf.flags |= SER_RS485_RTS_AFTER_SEND;
 	rs485conf.delay_rts_after_send = ...;
 
 	/* Set this flag if you want to receive data even whilst sending data */
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 4a0f86fa1e90..4c823f341d98 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -228,7 +228,7 @@ void atmel_config_rs485(struct uart_port *port, struct serial_rs485 *rs485conf)
 	if (rs485conf->flags & SER_RS485_ENABLED) {
 		dev_dbg(port->dev, "Setting UART to RS485\n");
 		atmel_port->tx_done_mask = ATMEL_US_TXEMPTY;
-		if (rs485conf->flags & SER_RS485_RTS_AFTER_SEND)
+		if ((rs485conf->delay_rts_after_send) > 0)
 			UART_PUT_TTGR(port, rs485conf->delay_rts_after_send);
 		mode |= ATMEL_US_USMODE_RS485;
 	} else {
@@ -304,7 +304,7 @@ static void atmel_set_mctrl(struct uart_port *port, u_int mctrl)
 
 	if (atmel_port->rs485.flags & SER_RS485_ENABLED) {
 		dev_dbg(port->dev, "Setting UART to RS485\n");
-		if (atmel_port->rs485.flags & SER_RS485_RTS_AFTER_SEND)
+		if ((atmel_port->rs485.delay_rts_after_send) > 0)
 			UART_PUT_TTGR(port,
 					atmel_port->rs485.delay_rts_after_send);
 		mode |= ATMEL_US_USMODE_RS485;
@@ -1228,7 +1228,7 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios,
 
 	if (atmel_port->rs485.flags & SER_RS485_ENABLED) {
 		dev_dbg(port->dev, "Setting UART to RS485\n");
-		if (atmel_port->rs485.flags & SER_RS485_RTS_AFTER_SEND)
+		if ((atmel_port->rs485.delay_rts_after_send) > 0)
 			UART_PUT_TTGR(port,
 					atmel_port->rs485.delay_rts_after_send);
 		mode |= ATMEL_US_USMODE_RS485;
@@ -1447,16 +1447,6 @@ static void __devinit atmel_of_init_port(struct atmel_uart_port *atmel_port,
 		rs485conf->delay_rts_after_send = rs485_delay[1];
 		rs485conf->flags = 0;
 
-		if (rs485conf->delay_rts_before_send == 0 &&
-		    rs485conf->delay_rts_after_send == 0) {
-			rs485conf->flags |= SER_RS485_RTS_ON_SEND;
-		} else {
-			if (rs485conf->delay_rts_before_send)
-				rs485conf->flags |= SER_RS485_RTS_BEFORE_SEND;
-			if (rs485conf->delay_rts_after_send)
-				rs485conf->flags |= SER_RS485_RTS_AFTER_SEND;
-		}
-
 		if (of_get_property(np, "rs485-rx-during-tx", NULL))
 			rs485conf->flags |= SER_RS485_RX_DURING_TX;
 
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index b7435043f2fe..1dfba7b779c8 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -3234,9 +3234,8 @@ rs_write(struct tty_struct *tty,
 		e100_disable_rx(info);
 		e100_enable_rx_irq(info);
 #endif
-		if ((info->rs485.flags & SER_RS485_RTS_BEFORE_SEND) &&
-			(info->rs485.delay_rts_before_send > 0))
-				msleep(info->rs485.delay_rts_before_send);
+		if (info->rs485.delay_rts_before_send > 0)
+			msleep(info->rs485.delay_rts_before_send);
 	}
 #endif /* CONFIG_ETRAX_RS485 */
 
@@ -3693,10 +3692,6 @@ rs_ioctl(struct tty_struct *tty,
 
 		rs485data.delay_rts_before_send = rs485ctrl.delay_rts_before_send;
 		rs485data.flags = 0;
-		if (rs485data.delay_rts_before_send != 0)
-			rs485data.flags |= SER_RS485_RTS_BEFORE_SEND;
-		else
-			rs485data.flags &= ~(SER_RS485_RTS_BEFORE_SEND);
 
 		if (rs485ctrl.enabled)
 			rs485data.flags |= SER_RS485_ENABLED;
@@ -4531,7 +4526,6 @@ static int __init rs_init(void)
 		/* Set sane defaults */
 		info->rs485.flags &= ~(SER_RS485_RTS_ON_SEND);
 		info->rs485.flags |= SER_RS485_RTS_AFTER_SEND;
-		info->rs485.flags &= ~(SER_RS485_RTS_BEFORE_SEND);
 		info->rs485.delay_rts_before_send = 0;
 		info->rs485.flags &= ~(SER_RS485_ENABLED);
 #endif
diff --git a/include/linux/serial.h b/include/linux/serial.h
index 97ff8e27a6cc..3d86517fe7d5 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -207,13 +207,15 @@ struct serial_icounter_struct {
 
 struct serial_rs485 {
 	__u32	flags;			/* RS485 feature flags */
-#define SER_RS485_ENABLED		(1 << 0)
-#define SER_RS485_RTS_ON_SEND		(1 << 1)
-#define SER_RS485_RTS_AFTER_SEND	(1 << 2)
-#define SER_RS485_RTS_BEFORE_SEND	(1 << 3)
+#define SER_RS485_ENABLED		(1 << 0)	/* If enabled */
+#define SER_RS485_RTS_ON_SEND		(1 << 1)	/* Logical level for
+							   RTS pin when
+							   sending */
+#define SER_RS485_RTS_AFTER_SEND	(1 << 2)	/* Logical level for
+							   RTS pin after sent*/
 #define SER_RS485_RX_DURING_TX		(1 << 4)
-	__u32	delay_rts_before_send;	/* Milliseconds */
-	__u32	delay_rts_after_send;	/* Milliseconds */
+	__u32	delay_rts_before_send;	/* Delay before send (milliseconds) */
+	__u32	delay_rts_after_send;	/* Delay after send (milliseconds) */
 	__u32	padding[5];		/* Memory is cheap, new structs
 					   are a royal PITA .. */
 };
-- 
cgit v1.2.3


From 468e6a20afaccb67e2a7d7f60d301f90e1c6f301 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 7 Sep 2011 10:41:32 -0600
Subject: writeback: remove vm_dirties and task->dirties

They are not used any more.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/init_task.h | 1 -
 include/linux/sched.h     | 1 -
 kernel/fork.c             | 5 -----
 mm/page-writeback.c       | 9 ---------
 4 files changed, 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 08ffab01e76c..94b1e356c02a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -184,7 +184,6 @@ extern struct cred init_cred;
 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
 	},								\
 	.thread_group	= LIST_HEAD_INIT(tsk.thread_group),		\
-	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
 	INIT_IDS							\
 	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 68daf4f27e2c..1c4f3e9b9bc5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1521,7 +1521,6 @@ struct task_struct {
 #ifdef CONFIG_FAULT_INJECTION
 	int make_it_fail;
 #endif
-	struct prop_local_single dirties;
 	/*
 	 * when (nr_dirtied >= nr_dirtied_pause), it's time to call
 	 * balance_dirty_pages() for some dirty throttling pause
diff --git a/kernel/fork.c b/kernel/fork.c
index ba0d17261329..da4a6a10d088 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -162,7 +162,6 @@ static void account_kernel_stack(struct thread_info *ti, int account)
 
 void free_task(struct task_struct *tsk)
 {
-	prop_local_destroy_single(&tsk->dirties);
 	account_kernel_stack(tsk->stack, -1);
 	free_thread_info(tsk->stack);
 	rt_mutex_debug_task_free(tsk);
@@ -274,10 +273,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 
 	tsk->stack = ti;
 
-	err = prop_local_init_single(&tsk->dirties);
-	if (err)
-		goto out;
-
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
 	clear_tsk_need_resched(tsk);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e7cb5ff6e53d..71252486bc6f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -128,7 +128,6 @@ unsigned long global_dirty_limit;
  *
  */
 static struct prop_descriptor vm_completions;
-static struct prop_descriptor vm_dirties;
 
 /*
  * couple the period to the dirty_ratio:
@@ -154,7 +153,6 @@ static void update_completion_period(void)
 {
 	int shift = calc_period_shift();
 	prop_change_shift(&vm_completions, shift);
-	prop_change_shift(&vm_dirties, shift);
 
 	writeback_set_ratelimit();
 }
@@ -235,11 +233,6 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
 }
 EXPORT_SYMBOL_GPL(bdi_writeout_inc);
 
-void task_dirty_inc(struct task_struct *tsk)
-{
-	prop_inc_single(&vm_dirties, &tsk->dirties);
-}
-
 /*
  * Obtain an accurate fraction of the BDI's portion.
  */
@@ -1395,7 +1388,6 @@ void __init page_writeback_init(void)
 
 	shift = calc_period_shift();
 	prop_descriptor_init(&vm_completions, shift);
-	prop_descriptor_init(&vm_dirties, shift);
 }
 
 /**
@@ -1724,7 +1716,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 		__inc_zone_page_state(page, NR_DIRTIED);
 		__inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
 		__inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
-		task_dirty_inc(current);
 		task_io_account_write(PAGE_CACHE_SIZE);
 	}
 }
-- 
cgit v1.2.3


From f6f8285132907757ef84ef8dae0a1244b8cde6ac Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Nov 2011 12:58:07 -0800
Subject: pstore: pass allocated memory region back to caller

The buf_lock cannot be held while populating the inodes, so make the backend
pass forward an allocated and filled buffer instead. This solves the following
backtrace. The effect is that "buf" is only ever used to notify the backends
that something was written to it, and shouldn't be used in the read path.

To replace the buf_lock during the read path, isolate the open/read/close
loop with a separate mutex to maintain serialized access to the backend.

Note that is is up to the pstore backend to cope if the (*write)() path is
called in the middle of the read path.

[   59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847
[   59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount
[   59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1
[   59.691019] Call Trace:
[   59.691019]  [<810252d5>] __might_sleep+0xc3/0xca
[   59.691019]  [<810a26e6>] kmem_cache_alloc+0x32/0xf3
[   59.691019]  [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4
[   59.691019]  [<810b68b1>] alloc_inode+0x2a/0x64
[   59.691019]  [<810b6903>] new_inode+0x18/0x43
[   59.691019]  [<81142447>] pstore_get_inode.isra.1+0x11/0x98
[   59.691019]  [<81142623>] pstore_mkfile+0xae/0x26f
[   59.691019]  [<810a2a66>] ? kmem_cache_free+0x19/0xb1
[   59.691019]  [<8116c821>] ? ida_get_new_above+0x140/0x158
[   59.691019]  [<811708ea>] ? __init_rwsem+0x1e/0x2c
[   59.691019]  [<810b67e8>] ? inode_init_always+0x111/0x1b0
[   59.691019]  [<8102127e>] ? should_resched+0xd/0x27
[   59.691019]  [<8137977f>] ? _cond_resched+0xd/0x21
[   59.691019]  [<81142abf>] pstore_get_records+0x52/0xa7
[   59.691019]  [<8114254b>] pstore_fill_super+0x7d/0x91
[   59.691019]  [<810a7ff5>] mount_single+0x46/0x82
[   59.691019]  [<8114231a>] pstore_mount+0x15/0x17
[   59.691019]  [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98
[   59.691019]  [<810a8199>] mount_fs+0x5a/0x12d
[   59.691019]  [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a
[   59.691019]  [<810b9474>] vfs_kern_mount+0x4f/0x7d
[   59.691019]  [<810b9d7e>] do_kern_mount+0x34/0xb2
[   59.691019]  [<810bb15f>] do_mount+0x5fc/0x64a
[   59.691019]  [<810912fb>] ? strndup_user+0x2e/0x3f
[   59.691019]  [<810bb3cb>] sys_mount+0x66/0x99
[   59.691019]  [<8137b537>] sysenter_do_call+0x12/0x26

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/erst.c   | 31 ++++++++++++++++++++++---------
 drivers/firmware/efivars.c |  9 +++++++--
 fs/pstore/platform.c       | 13 ++++++++-----
 include/linux/pstore.h     |  4 +++-
 4 files changed, 40 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 127408069ca7..631b9477b99c 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -932,7 +932,8 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
 static int erst_open_pstore(struct pstore_info *psi);
 static int erst_close_pstore(struct pstore_info *psi);
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
-			   struct timespec *time, struct pstore_info *psi);
+			   struct timespec *time, char **buf,
+			   struct pstore_info *psi);
 static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part,
 		       size_t size, struct pstore_info *psi);
 static int erst_clearer(enum pstore_type_id type, u64 id,
@@ -986,17 +987,23 @@ static int erst_close_pstore(struct pstore_info *psi)
 }
 
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
-			   struct timespec *time, struct pstore_info *psi)
+			   struct timespec *time, char **buf,
+			   struct pstore_info *psi)
 {
 	int rc;
 	ssize_t len = 0;
 	u64 record_id;
-	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
-					(erst_info.buf - sizeof(*rcd));
+	struct cper_pstore_record *rcd;
+	size_t rcd_len = sizeof(*rcd) + erst_info.bufsize;
 
 	if (erst_disable)
 		return -ENODEV;
 
+	rcd = kmalloc(rcd_len, GFP_KERNEL);
+	if (!rcd) {
+		rc = -ENOMEM;
+		goto out;
+	}
 skip:
 	rc = erst_get_record_id_next(&reader_pos, &record_id);
 	if (rc)
@@ -1004,22 +1011,27 @@ skip:
 
 	/* no more record */
 	if (record_id == APEI_ERST_INVALID_RECORD_ID) {
-		rc = -1;
+		rc = -EINVAL;
 		goto out;
 	}
 
-	len = erst_read(record_id, &rcd->hdr, sizeof(*rcd) +
-			erst_info.bufsize);
+	len = erst_read(record_id, &rcd->hdr, rcd_len);
 	/* The record may be cleared by others, try read next record */
 	if (len == -ENOENT)
 		goto skip;
-	else if (len < 0) {
-		rc = -1;
+	else if (len < sizeof(*rcd)) {
+		rc = -EIO;
 		goto out;
 	}
 	if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
 		goto skip;
 
+	*buf = kmalloc(len, GFP_KERNEL);
+	if (*buf == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	memcpy(*buf, rcd->data, len - sizeof(*rcd));
 	*id = record_id;
 	if (uuid_le_cmp(rcd->sec_hdr.section_type,
 			CPER_SECTION_TYPE_DMESG) == 0)
@@ -1037,6 +1049,7 @@ skip:
 	time->tv_nsec = 0;
 
 out:
+	kfree(rcd);
 	return (rc < 0) ? rc : (len - sizeof(*rcd));
 }
 
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 8370f72d87ff..a54a6b972ced 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -457,7 +457,8 @@ static int efi_pstore_close(struct pstore_info *psi)
 }
 
 static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
-			       struct timespec *timespec, struct pstore_info *psi)
+			       struct timespec *timespec,
+			       char **buf, struct pstore_info *psi)
 {
 	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
 	struct efivars *efivars = psi->data;
@@ -478,7 +479,11 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 				timespec->tv_nsec = 0;
 				get_var_data_locked(efivars, &efivars->walk_entry->var);
 				size = efivars->walk_entry->var.DataSize;
-				memcpy(psi->buf, efivars->walk_entry->var.Data, size);
+				*buf = kmalloc(size, GFP_KERNEL);
+				if (*buf == NULL)
+					return -ENOMEM;
+				memcpy(*buf, efivars->walk_entry->var.Data,
+				       size);
 				efivars->walk_entry = list_entry(efivars->walk_entry->list.next,
 					           struct efivar_entry, list);
 				return size;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 2bd620f0d796..57bbf9078ac8 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -167,6 +167,7 @@ int pstore_register(struct pstore_info *psi)
 	}
 
 	psinfo = psi;
+	mutex_init(&psinfo->read_mutex);
 	spin_unlock(&pstore_lock);
 
 	if (owner && !try_module_get(owner)) {
@@ -195,30 +196,32 @@ EXPORT_SYMBOL_GPL(pstore_register);
 void pstore_get_records(int quiet)
 {
 	struct pstore_info *psi = psinfo;
+	char			*buf = NULL;
 	ssize_t			size;
 	u64			id;
 	enum pstore_type_id	type;
 	struct timespec		time;
 	int			failed = 0, rc;
-	unsigned long		flags;
 
 	if (!psi)
 		return;
 
-	spin_lock_irqsave(&psinfo->buf_lock, flags);
+	mutex_lock(&psi->read_mutex);
 	rc = psi->open(psi);
 	if (rc)
 		goto out;
 
-	while ((size = psi->read(&id, &type, &time, psi)) > 0) {
-		rc = pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
+	while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) {
+		rc = pstore_mkfile(type, psi->name, id, buf, (size_t)size,
 				  time, psi);
+		kfree(buf);
+		buf = NULL;
 		if (rc && (rc != -EEXIST || !quiet))
 			failed++;
 	}
 	psi->close(psi);
 out:
-	spin_unlock_irqrestore(&psinfo->buf_lock, flags);
+	mutex_unlock(&psi->read_mutex);
 
 	if (failed)
 		printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n",
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index ea567321ae3c..2ca8cde5459d 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -35,10 +35,12 @@ struct pstore_info {
 	spinlock_t	buf_lock;	/* serialize access to 'buf' */
 	char		*buf;
 	size_t		bufsize;
+	struct mutex	read_mutex;	/* serialize open/read/close */
 	int		(*open)(struct pstore_info *psi);
 	int		(*close)(struct pstore_info *psi);
 	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
-			struct timespec *time, struct pstore_info *psi);
+			struct timespec *time, char **buf,
+			struct pstore_info *psi);
 	int		(*write)(enum pstore_type_id type, u64 *id,
 			unsigned int part, size_t size, struct pstore_info *psi);
 	int		(*erase)(enum pstore_type_id type, u64 id,
-- 
cgit v1.2.3


From 67820021dc9c8da37f773025190280f55f3626d4 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 23 Nov 2011 11:33:07 +0100
Subject: i2c: Delete ANY_I2C_BUS

Last piece of code using ANY_I2C_BUS was deleted almost 2 years ago,
so ANY_I2C_BUS can go away as well.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/i2c.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index a81bf6d23b3e..07d103a06d64 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -432,9 +432,6 @@ void i2c_unlock_adapter(struct i2c_adapter *);
 /* Internal numbers to terminate lists */
 #define I2C_CLIENT_END		0xfffeU
 
-/* The numbers to use to set I2C bus address */
-#define ANY_I2C_BUS		0xffff
-
 /* Construct an I2C_CLIENT_END-terminated array of i2c addresses */
 #define I2C_ADDRS(addr, addrs...) \
 	((const unsigned short []){ addr, ## addrs, I2C_CLIENT_END })
-- 
cgit v1.2.3


From 780dc9ba4eb682a89be48d5b814feae6722a19e0 Mon Sep 17 00:00:00 2001
From: Afzal Mohammed <afzal@ti.com>
Date: Tue, 8 Nov 2011 18:54:10 +0530
Subject: regulator: TPS65910: Fix VDD1/2 voltage selector count

Count of selector voltage is required for regulator_set_voltage
to work via set_voltage_sel. VDD1/2 currently have it as zero,
so regulator_set_voltage won't work for VDD1/2.
Update count (n_voltages) for VDD1/2.

Output Voltage = (step value * 12.5 mV + 562.5 mV) * gain

With above expr, number of voltages that can be selected is
step value count * gain count

constant for gain count will be called VDD1_2_NUM_VOLT_COARSE

existing constant for step value count is VDD1_2_NUM_VOLTS,
use VDD1_2_NUM_VOLT_FINE instead to make clear that step value
is not the only component in deciding selectable voltage count

Signed-off-by: Afzal Mohammed <afzal@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/tps65910-regulator.c | 14 ++++++++------
 include/linux/mfd/tps65910.h           |  3 ++-
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
index 66d2d60b436a..b552aae55b41 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -664,10 +664,10 @@ static int tps65910_set_voltage_dcdc(struct regulator_dev *dev,
 
 	switch (id) {
 	case TPS65910_REG_VDD1:
-		dcdc_mult = (selector / VDD1_2_NUM_VOLTS) + 1;
+		dcdc_mult = (selector / VDD1_2_NUM_VOLT_FINE) + 1;
 		if (dcdc_mult == 1)
 			dcdc_mult--;
-		vsel = (selector % VDD1_2_NUM_VOLTS) + 3;
+		vsel = (selector % VDD1_2_NUM_VOLT_FINE) + 3;
 
 		tps65910_modify_bits(pmic, TPS65910_VDD1,
 				(dcdc_mult << VDD1_VGAIN_SEL_SHIFT),
@@ -675,10 +675,10 @@ static int tps65910_set_voltage_dcdc(struct regulator_dev *dev,
 		tps65910_reg_write(pmic, TPS65910_VDD1_OP, vsel);
 		break;
 	case TPS65910_REG_VDD2:
-		dcdc_mult = (selector / VDD1_2_NUM_VOLTS) + 1;
+		dcdc_mult = (selector / VDD1_2_NUM_VOLT_FINE) + 1;
 		if (dcdc_mult == 1)
 			dcdc_mult--;
-		vsel = (selector % VDD1_2_NUM_VOLTS) + 3;
+		vsel = (selector % VDD1_2_NUM_VOLT_FINE) + 3;
 
 		tps65910_modify_bits(pmic, TPS65910_VDD2,
 				(dcdc_mult << VDD2_VGAIN_SEL_SHIFT),
@@ -756,9 +756,9 @@ static int tps65910_list_voltage_dcdc(struct regulator_dev *dev,
 	switch (id) {
 	case TPS65910_REG_VDD1:
 	case TPS65910_REG_VDD2:
-		mult = (selector / VDD1_2_NUM_VOLTS) + 1;
+		mult = (selector / VDD1_2_NUM_VOLT_FINE) + 1;
 		volt = VDD1_2_MIN_VOLT +
-				(selector % VDD1_2_NUM_VOLTS) * VDD1_2_OFFSET;
+				(selector % VDD1_2_NUM_VOLT_FINE) * VDD1_2_OFFSET;
 		break;
 	case TPS65911_REG_VDDCTRL:
 		volt = VDDCTRL_MIN_VOLT + (selector * VDDCTRL_OFFSET);
@@ -947,6 +947,8 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
 
 		if (i == TPS65910_REG_VDD1 || i == TPS65910_REG_VDD2) {
 			pmic->desc[i].ops = &tps65910_ops_dcdc;
+			pmic->desc[i].n_voltages = VDD1_2_NUM_VOLT_FINE *
+							VDD1_2_NUM_VOLT_COARSE;
 		} else if (i == TPS65910_REG_VDD3) {
 			if (tps65910_chip_id(tps65910) == TPS65910)
 				pmic->desc[i].ops = &tps65910_ops_vdd3;
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 82b4c8801a4f..8bf2cb9502dd 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -243,7 +243,8 @@
 
 
 /*Registers VDD1, VDD2 voltage values definitions */
-#define VDD1_2_NUM_VOLTS				73
+#define VDD1_2_NUM_VOLT_FINE				73
+#define VDD1_2_NUM_VOLT_COARSE				3
 #define VDD1_2_MIN_VOLT					6000
 #define VDD1_2_OFFSET					125
 
-- 
cgit v1.2.3


From fe1a7fe2c4456679b3402f04268bdfafca7b127a Mon Sep 17 00:00:00 2001
From: Sasha Levin <levinsasha928@gmail.com>
Date: Tue, 15 Nov 2011 16:17:18 +0200
Subject: virtio-mmio: Correct the name of the guest features selector

Guest features selector spelling mistake.

Cc: Pawel Moll <pawel.moll@arm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_mmio.c | 2 +-
 include/linux/virtio_mmio.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index acc5e43c373e..7317dc2ec426 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -118,7 +118,7 @@ static void vm_finalize_features(struct virtio_device *vdev)
 	vring_transport_features(vdev);
 
 	for (i = 0; i < ARRAY_SIZE(vdev->features); i++) {
-		writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SET);
+		writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
 		writel(vdev->features[i],
 				vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
 	}
diff --git a/include/linux/virtio_mmio.h b/include/linux/virtio_mmio.h
index 27c7edefbc86..5c7b6f0daef8 100644
--- a/include/linux/virtio_mmio.h
+++ b/include/linux/virtio_mmio.h
@@ -63,7 +63,7 @@
 #define VIRTIO_MMIO_GUEST_FEATURES	0x020
 
 /* Activated features set selector - Write Only */
-#define VIRTIO_MMIO_GUEST_FEATURES_SET	0x024
+#define VIRTIO_MMIO_GUEST_FEATURES_SEL	0x024
 
 /* Guest's memory page size in bytes - Write Only */
 #define VIRTIO_MMIO_GUEST_PAGE_SIZE	0x028
-- 
cgit v1.2.3


From e6af578c5305be693a1bc7f4dc7b51dd82d41425 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 17 Nov 2011 17:41:15 +0200
Subject: virtio-pci: make reset operation safer

virtio pci device reset actually just does an I/O
write, which in PCI is really posted, that is it
can complete on CPU before the device has received it.

Further, interrupts might have been pending on
another CPU, so device callback might get invoked after reset.

This conflicts with how drivers use reset, which is typically:
	reset
	unregister
a callback running after reset completed can race with
unregister, potentially leading to use after free bugs.

Fix by flushing out the write, and flushing pending interrupts.

This assumes that device is never reset from
its vq/config callbacks, or in parallel with being
added/removed, document this assumption.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_pci.c   | 18 ++++++++++++++++++
 include/linux/virtio_config.h |  2 ++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 3d1bf41e8892..03d1984bd363 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -169,11 +169,29 @@ static void vp_set_status(struct virtio_device *vdev, u8 status)
 	iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
 }
 
+/* wait for pending irq handlers */
+static void vp_synchronize_vectors(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	int i;
+
+	if (vp_dev->intx_enabled)
+		synchronize_irq(vp_dev->pci_dev->irq);
+
+	for (i = 0; i < vp_dev->msix_vectors; ++i)
+		synchronize_irq(vp_dev->msix_entries[i].vector);
+}
+
 static void vp_reset(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	/* 0 status means a reset. */
 	iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	/* Flush out the status write, and flush in device writes,
+	 * including MSi-X interrupts, if any. */
+	ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	/* Flush pending VQ/configuration callbacks. */
+	vp_synchronize_vectors(vdev);
 }
 
 /* the notify function used when creating a virt queue */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index add4790b21fe..e9e72bda1b72 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -85,6 +85,8 @@
  * @reset: reset the device
  *	vdev: the virtio device
  *	After this, status and feature negotiation must be done again
+ *	Device must not be reset from its vq/config callbacks, or in
+ *	parallel with being added/removed.
  * @find_vqs: find virtqueues and instantiate them.
  *	vdev: the virtio_device
  *	nvqs: the number of virtqueues to find
-- 
cgit v1.2.3


From f7bc83d87d242917ca0ee041ed509f57f361dd56 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 23 Nov 2011 21:20:32 +0100
Subject: PM: Update comments describing device power management callbacks

The comments describing device power management callbacks in
include/pm.h are outdated and somewhat confusing, so make them
reflect the reality more accurately.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/pm.h | 229 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 134 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 5c4c8b18c8b7..3f3ed83a9aa5 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -54,118 +54,145 @@ typedef struct pm_message {
 /**
  * struct dev_pm_ops - device PM callbacks
  *
- * Several driver power state transitions are externally visible, affecting
+ * Several device power state transitions are externally visible, affecting
  * the state of pending I/O queues and (for drivers that touch hardware)
  * interrupts, wakeups, DMA, and other hardware state.  There may also be
- * internal transitions to various low power modes, which are transparent
+ * internal transitions to various low-power modes which are transparent
  * to the rest of the driver stack (such as a driver that's ON gating off
  * clocks which are not in active use).
  *
- * The externally visible transitions are handled with the help of the following
- * callbacks included in this structure:
- *
- * @prepare: Prepare the device for the upcoming transition, but do NOT change
- *	its hardware state.  Prevent new children of the device from being
- *	registered after @prepare() returns (the driver's subsystem and
- *	generally the rest of the kernel is supposed to prevent new calls to the
- *	probe method from being made too once @prepare() has succeeded).  If
- *	@prepare() detects a situation it cannot handle (e.g. registration of a
- *	child already in progress), it may return -EAGAIN, so that the PM core
- *	can execute it once again (e.g. after the new child has been registered)
- *	to recover from the race condition.  This method is executed for all
- *	kinds of suspend transitions and is followed by one of the suspend
- *	callbacks: @suspend(), @freeze(), or @poweroff().
- *	The PM core executes @prepare() for all devices before starting to
- *	execute suspend callbacks for any of them, so drivers may assume all of
- *	the other devices to be present and functional while @prepare() is being
- *	executed.  In particular, it is safe to make GFP_KERNEL memory
- *	allocations from within @prepare().  However, drivers may NOT assume
- *	anything about the availability of the user space at that time and it
- *	is not correct to request firmware from within @prepare() (it's too
- *	late to do that).  [To work around this limitation, drivers may
- *	register suspend and hibernation notifiers that are executed before the
- *	freezing of tasks.]
+ * The externally visible transitions are handled with the help of callbacks
+ * included in this structure in such a way that two levels of callbacks are
+ * involved.  First, the PM core executes callbacks provided by PM domains,
+ * device types, classes and bus types.  They are the subsystem-level callbacks
+ * supposed to execute callbacks provided by device drivers, although they may
+ * choose not to do that.  If the driver callbacks are executed, they have to
+ * collaborate with the subsystem-level callbacks to achieve the goals
+ * appropriate for the given system transition, given transition phase and the
+ * subsystem the device belongs to.
+ *
+ * @prepare: The principal role of this callback is to prevent new children of
+ *	the device from being registered after it has returned (the driver's
+ *	subsystem and generally the rest of the kernel is supposed to prevent
+ *	new calls to the probe method from being made too once @prepare() has
+ *	succeeded).  If @prepare() detects a situation it cannot handle (e.g.
+ *	registration of a child already in progress), it may return -EAGAIN, so
+ *	that the PM core can execute it once again (e.g. after a new child has
+ *	been registered) to recover from the race condition.
+ *	This method is executed for all kinds of suspend transitions and is
+ *	followed by one of the suspend callbacks: @suspend(), @freeze(), or
+ *	@poweroff().  The PM core executes subsystem-level @prepare() for all
+ *	devices before starting to invoke suspend callbacks for any of them, so
+ *	generally devices may be assumed to be functional or to respond to
+ *	runtime resume requests while @prepare() is being executed.  However,
+ *	device drivers may NOT assume anything about the availability of user
+ *	space at that time and it is NOT valid to request firmware from within
+ *	@prepare() (it's too late to do that).  It also is NOT valid to allocate
+ *	substantial amounts of memory from @prepare() in the GFP_KERNEL mode.
+ *	[To work around these limitations, drivers may register suspend and
+ *	hibernation notifiers to be executed before the freezing of tasks.]
  *
  * @complete: Undo the changes made by @prepare().  This method is executed for
  *	all kinds of resume transitions, following one of the resume callbacks:
  *	@resume(), @thaw(), @restore().  Also called if the state transition
- *	fails before the driver's suspend callback (@suspend(), @freeze(),
- *	@poweroff()) can be executed (e.g. if the suspend callback fails for one
+ *	fails before the driver's suspend callback: @suspend(), @freeze() or
+ *	@poweroff(), can be executed (e.g. if the suspend callback fails for one
  *	of the other devices that the PM core has unsuccessfully attempted to
  *	suspend earlier).
- *	The PM core executes @complete() after it has executed the appropriate
- *	resume callback for all devices.
+ *	The PM core executes subsystem-level @complete() after it has executed
+ *	the appropriate resume callbacks for all devices.
  *
  * @suspend: Executed before putting the system into a sleep state in which the
- *	contents of main memory are preserved.  Quiesce the device, put it into
- *	a low power state appropriate for the upcoming system state (such as
- *	PCI_D3hot), and enable wakeup events as appropriate.
+ *	contents of main memory are preserved.  The exact action to perform
+ *	depends on the device's subsystem (PM domain, device type, class or bus
+ *	type), but generally the device must be quiescent after subsystem-level
+ *	@suspend() has returned, so that it doesn't do any I/O or DMA.
+ *	Subsystem-level @suspend() is executed for all devices after invoking
+ *	subsystem-level @prepare() for all of them.
  *
  * @resume: Executed after waking the system up from a sleep state in which the
- *	contents of main memory were preserved.  Put the device into the
- *	appropriate state, according to the information saved in memory by the
- *	preceding @suspend().  The driver starts working again, responding to
- *	hardware events and software requests.  The hardware may have gone
- *	through a power-off reset, or it may have maintained state from the
- *	previous suspend() which the driver may rely on while resuming.  On most
- *	platforms, there are no restrictions on availability of resources like
- *	clocks during @resume().
+ *	contents of main memory were preserved.  The exact action to perform
+ *	depends on the device's subsystem, but generally the driver is expected
+ *	to start working again, responding to hardware events and software
+ *	requests (the device itself may be left in a low-power state, waiting
+ *	for a runtime resume to occur).  The state of the device at the time its
+ *	driver's @resume() callback is run depends on the platform and subsystem
+ *	the device belongs to.  On most platforms, there are no restrictions on
+ *	availability of resources like clocks during @resume().
+ *	Subsystem-level @resume() is executed for all devices after invoking
+ *	subsystem-level @resume_noirq() for all of them.
  *
  * @freeze: Hibernation-specific, executed before creating a hibernation image.
- *	Quiesce operations so that a consistent image can be created, but do NOT
- *	otherwise put the device into a low power device state and do NOT emit
- *	system wakeup events.  Save in main memory the device settings to be
- *	used by @restore() during the subsequent resume from hibernation or by
- *	the subsequent @thaw(), if the creation of the image or the restoration
- *	of main memory contents from it fails.
+ *	Analogous to @suspend(), but it should not enable the device to signal
+ *	wakeup events or change its power state.  The majority of subsystems
+ *	(with the notable exception of the PCI bus type) expect the driver-level
+ *	@freeze() to save the device settings in memory to be used by @restore()
+ *	during the subsequent resume from hibernation.
+ *	Subsystem-level @freeze() is executed for all devices after invoking
+ *	subsystem-level @prepare() for all of them.
  *
  * @thaw: Hibernation-specific, executed after creating a hibernation image OR
- *	if the creation of the image fails.  Also executed after a failing
+ *	if the creation of an image has failed.  Also executed after a failing
  *	attempt to restore the contents of main memory from such an image.
  *	Undo the changes made by the preceding @freeze(), so the device can be
  *	operated in the same way as immediately before the call to @freeze().
+ *	Subsystem-level @thaw() is executed for all devices after invoking
+ *	subsystem-level @thaw_noirq() for all of them.  It also may be executed
+ *	directly after @freeze() in case of a transition error.
  *
  * @poweroff: Hibernation-specific, executed after saving a hibernation image.
- *	Quiesce the device, put it into a low power state appropriate for the
- *	upcoming system state (such as PCI_D3hot), and enable wakeup events as
- *	appropriate.
+ *	Analogous to @suspend(), but it need not save the device's settings in
+ *	memory.
+ *	Subsystem-level @poweroff() is executed for all devices after invoking
+ *	subsystem-level @prepare() for all of them.
  *
  * @restore: Hibernation-specific, executed after restoring the contents of main
- *	memory from a hibernation image.  Driver starts working again,
- *	responding to hardware events and software requests.  Drivers may NOT
- *	make ANY assumptions about the hardware state right prior to @restore().
- *	On most platforms, there are no restrictions on availability of
- *	resources like clocks during @restore().
- *
- * @suspend_noirq: Complete the operations of ->suspend() by carrying out any
- *	actions required for suspending the device that need interrupts to be
- *	disabled
- *
- * @resume_noirq: Prepare for the execution of ->resume() by carrying out any
- *	actions required for resuming the device that need interrupts to be
- *	disabled
- *
- * @freeze_noirq: Complete the operations of ->freeze() by carrying out any
- *	actions required for freezing the device that need interrupts to be
- *	disabled
- *
- * @thaw_noirq: Prepare for the execution of ->thaw() by carrying out any
- *	actions required for thawing the device that need interrupts to be
- *	disabled
- *
- * @poweroff_noirq: Complete the operations of ->poweroff() by carrying out any
- *	actions required for handling the device that need interrupts to be
- *	disabled
- *
- * @restore_noirq: Prepare for the execution of ->restore() by carrying out any
- *	actions required for restoring the operations of the device that need
- *	interrupts to be disabled
+ *	memory from a hibernation image, analogous to @resume().
+ *
+ * @suspend_noirq: Complete the actions started by @suspend().  Carry out any
+ *	additional operations required for suspending the device that might be
+ *	racing with its driver's interrupt handler, which is guaranteed not to
+ *	run while @suspend_noirq() is being executed.
+ *	It generally is expected that the device will be in a low-power state
+ *	(appropriate for the target system sleep state) after subsystem-level
+ *	@suspend_noirq() has returned successfully.  If the device can generate
+ *	system wakeup signals and is enabled to wake up the system, it should be
+ *	configured to do so at that time.  However, depending on the platform
+ *	and device's subsystem, @suspend() may be allowed to put the device into
+ *	the low-power state and configure it to generate wakeup signals, in
+ *	which case it generally is not necessary to define @suspend_noirq().
+ *
+ * @resume_noirq: Prepare for the execution of @resume() by carrying out any
+ *	operations required for resuming the device that might be racing with
+ *	its driver's interrupt handler, which is guaranteed not to run while
+ *	@resume_noirq() is being executed.
+ *
+ * @freeze_noirq: Complete the actions started by @freeze().  Carry out any
+ *	additional operations required for freezing the device that might be
+ *	racing with its driver's interrupt handler, which is guaranteed not to
+ *	run while @freeze_noirq() is being executed.
+ *	The power state of the device should not be changed by either @freeze()
+ *	or @freeze_noirq() and it should not be configured to signal system
+ *	wakeup by any of these callbacks.
+ *
+ * @thaw_noirq: Prepare for the execution of @thaw() by carrying out any
+ *	operations required for thawing the device that might be racing with its
+ *	driver's interrupt handler, which is guaranteed not to run while
+ *	@thaw_noirq() is being executed.
+ *
+ * @poweroff_noirq: Complete the actions started by @poweroff().  Analogous to
+ *	@suspend_noirq(), but it need not save the device's settings in memory.
+ *
+ * @restore_noirq: Prepare for the execution of @restore() by carrying out any
+ *	operations required for thawing the device that might be racing with its
+ *	driver's interrupt handler, which is guaranteed not to run while
+ *	@restore_noirq() is being executed.  Analogous to @resume_noirq().
  *
  * All of the above callbacks, except for @complete(), return error codes.
  * However, the error codes returned by the resume operations, @resume(),
- * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq() do
+ * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do
  * not cause the PM core to abort the resume transition during which they are
- * returned.  The error codes returned in that cases are only printed by the PM
+ * returned.  The error codes returned in those cases are only printed by the PM
  * core to the system logs for debugging purposes.  Still, it is recommended
  * that drivers only return error codes from their resume methods in case of an
  * unrecoverable failure (i.e. when the device being handled refuses to resume
@@ -174,31 +201,43 @@ typedef struct pm_message {
  * their children.
  *
  * It is allowed to unregister devices while the above callbacks are being
- * executed.  However, it is not allowed to unregister a device from within any
- * of its own callbacks.
+ * executed.  However, a callback routine must NOT try to unregister the device
+ * it was called for, although it may unregister children of that device (for
+ * example, if it detects that a child was unplugged while the system was
+ * asleep).
+ *
+ * Refer to Documentation/power/devices.txt for more information about the role
+ * of the above callbacks in the system suspend process.
  *
- * There also are the following callbacks related to run-time power management
- * of devices:
+ * There also are callbacks related to runtime power management of devices.
+ * Again, these callbacks are executed by the PM core only for subsystems
+ * (PM domains, device types, classes and bus types) and the subsystem-level
+ * callbacks are supposed to invoke the driver callbacks.  Moreover, the exact
+ * actions to be performed by a device driver's callbacks generally depend on
+ * the platform and subsystem the device belongs to.
  *
  * @runtime_suspend: Prepare the device for a condition in which it won't be
  *	able to communicate with the CPU(s) and RAM due to power management.
- *	This need not mean that the device should be put into a low power state.
+ *	This need not mean that the device should be put into a low-power state.
  *	For example, if the device is behind a link which is about to be turned
  *	off, the device may remain at full power.  If the device does go to low
- *	power and is capable of generating run-time wake-up events, remote
- *	wake-up (i.e., a hardware mechanism allowing the device to request a
- *	change of its power state via a wake-up event, such as PCI PME) should
- *	be enabled for it.
+ *	power and is capable of generating runtime wakeup events, remote wakeup
+ *	(i.e., a hardware mechanism allowing the device to request a change of
+ *	its power state via an interrupt) should be enabled for it.
  *
  * @runtime_resume: Put the device into the fully active state in response to a
- *	wake-up event generated by hardware or at the request of software.  If
- *	necessary, put the device into the full power state and restore its
+ *	wakeup event generated by hardware or at the request of software.  If
+ *	necessary, put the device into the full-power state and restore its
  *	registers, so that it is fully operational.
  *
- * @runtime_idle: Device appears to be inactive and it might be put into a low
- *	power state if all of the necessary conditions are satisfied.  Check
+ * @runtime_idle: Device appears to be inactive and it might be put into a
+ *	low-power state if all of the necessary conditions are satisfied.  Check
  *	these conditions and handle the device as appropriate, possibly queueing
  *	a suspend request for it.  The return value is ignored by the PM core.
+ *
+ * Refer to Documentation/power/runtime_pm.txt for more information about the
+ * role of the above callbacks in device runtime power management.
+ *
  */
 
 struct dev_pm_ops {
-- 
cgit v1.2.3


From 5cac98dd06bc43a7baab3523184f70fd359e9f35 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 27 Nov 2011 21:14:46 +0000
Subject: net: Fix corruption in /proc/*/net/dev_mcast

I just hit this during my testing. Isn't there another bug lurking?

BUG kmalloc-8: Redzone overwritten

INFO: 0xc0000000de9dec48-0xc0000000de9dec4b. First byte 0x0 instead of 0xcc
INFO: Allocated in .__seq_open_private+0x30/0xa0 age=0 cpu=5 pid=3896
	.__kmalloc+0x1e0/0x2d0
	.__seq_open_private+0x30/0xa0
	.seq_open_net+0x60/0xe0
	.dev_mc_seq_open+0x4c/0x70
	.proc_reg_open+0xd8/0x260
	.__dentry_open.clone.11+0x2b8/0x400
	.do_last+0xf4/0x950
	.path_openat+0xf8/0x480
	.do_filp_open+0x48/0xc0
	.do_sys_open+0x140/0x250
	syscall_exit+0x0/0x40

dev_mc_seq_ops uses dev_seq_start/next/stop but only allocates
sizeof(struct seq_net_private) of private data, whereas it expects
sizeof(struct dev_iter_state):

struct dev_iter_state {
	struct seq_net_private p;
	unsigned int pos; /* bucket << BUCKET_SPACE + offset */
};

Create dev_seq_open_ops and use it so we don't have to expose
struct dev_iter_state.

[ Problem added by commit f04565ddf52e4 (dev: use name hash for
  dev_seq_ops) -Eric ]

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 net/core/dev.c            | 6 ++++++
 net/core/dev_addr_lists.c | 3 +--
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cbeb5867cff7..a82ad4dd306a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2536,6 +2536,8 @@ extern void		net_disable_timestamp(void);
 extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
 extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
 extern void dev_seq_stop(struct seq_file *seq, void *v);
+extern int dev_seq_open_ops(struct inode *inode, struct file *file,
+			    const struct seq_operations *ops);
 #endif
 
 extern int netdev_class_create_file(struct class_attribute *class_attr);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6ba50a1e404c..1482eea0bbf0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4282,6 +4282,12 @@ static int dev_seq_open(struct inode *inode, struct file *file)
 			    sizeof(struct dev_iter_state));
 }
 
+int dev_seq_open_ops(struct inode *inode, struct file *file,
+		     const struct seq_operations *ops)
+{
+	return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state));
+}
+
 static const struct file_operations dev_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = dev_seq_open,
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 277faef9148d..febba516db62 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -696,8 +696,7 @@ static const struct seq_operations dev_mc_seq_ops = {
 
 static int dev_mc_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
+	return dev_seq_open_ops(inode, file, &dev_mc_seq_ops);
 }
 
 static const struct file_operations dev_mc_seq_fops = {
-- 
cgit v1.2.3