From 7b7232f3fb5ecd7c30cb52df368070cc5f5ca614 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 1 Feb 2006 21:06:49 -0600
Subject: [SCSI] iscsi update: cleanup iscsi class interface

From:
michaelc@cs.wisc.edu
fujita.tomonori@lab.ntt.co.jp
da-x@monatomic.org

and err path fixup from:
ogerlitz@voltaire.com

This patch cleans up that interface by having the lld and class
pass a iscsi_cls_session or iscsi_cls_conn between each other when
the function is used by HW and SW iscsi llds. This way the lld
does not have to remember if it has to send a handle or pointer
and a handle or pointer to connection, session or host.

This also has the class verify the session handle that gets passed from
userspace instead of using the pointer passed into the kernel directly.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Alex Aizman <itn780@yahoo.com>
Signed-off-by: Dmitry Yusupov <dmitry_yus@yahoo.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/scsi/iscsi_if.h             |  3 ---
 include/scsi/scsi_transport_iscsi.h | 34 ++++++++++++++++++----------------
 2 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index 3e5cb5ab2d34..e5618b90996e 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -163,9 +163,6 @@ enum iscsi_param {
 };
 #define ISCSI_PARAM_MAX			14
 
-typedef uint64_t iscsi_sessionh_t;	/* iSCSI Data-Path session handle */
-typedef uint64_t iscsi_connh_t;		/* iSCSI Data-Path connection handle */
-
 #define iscsi_ptr(_handle) ((void*)(unsigned long)_handle)
 #define iscsi_handle(_ptr) ((uint64_t)(unsigned long)_ptr)
 #define hostdata_session(_hostdata) (iscsi_ptr(*(unsigned long *)_hostdata))
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 16602a547a63..b41cf077e54b 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -63,25 +63,28 @@ struct iscsi_transport {
 	int max_lun;
 	unsigned int max_conn;
 	unsigned int max_cmd_len;
-	struct Scsi_Host *(*create_session) (struct scsi_transport_template *t,
-					     uint32_t initial_cmdsn);
-	void (*destroy_session) (struct Scsi_Host *shost);
-	struct iscsi_cls_conn *(*create_conn) (struct Scsi_Host *shost,
+	struct iscsi_cls_session *(*create_session)
+		(struct scsi_transport_template *t, uint32_t sn, uint32_t *sid);
+	void (*destroy_session) (struct iscsi_cls_session *session);
+	struct iscsi_cls_conn *(*create_conn) (struct iscsi_cls_session *sess,
 				uint32_t cid);
-	int (*bind_conn) (iscsi_sessionh_t session, iscsi_connh_t conn,
+	int (*bind_conn) (struct iscsi_cls_session *session,
+			  struct iscsi_cls_conn *cls_conn,
 			  uint32_t transport_fd, int is_leading);
-	int (*start_conn) (iscsi_connh_t conn);
-	void (*stop_conn) (iscsi_connh_t conn, int flag);
+	int (*start_conn) (struct iscsi_cls_conn *conn);
+	void (*stop_conn) (struct iscsi_cls_conn *conn, int flag);
 	void (*destroy_conn) (struct iscsi_cls_conn *conn);
-	int (*set_param) (iscsi_connh_t conn, enum iscsi_param param,
+	int (*set_param) (struct iscsi_cls_conn *conn, enum iscsi_param param,
 			  uint32_t value);
-	int (*get_conn_param) (void *conndata, enum iscsi_param param,
+	int (*get_conn_param) (struct iscsi_cls_conn *conn,
+			       enum iscsi_param param,
 			       uint32_t *value);
-	int (*get_session_param) (struct Scsi_Host *shost,
+	int (*get_session_param) (struct iscsi_cls_session *session,
 				  enum iscsi_param param, uint32_t *value);
-	int (*send_pdu) (iscsi_connh_t conn, struct iscsi_hdr *hdr,
+	int (*send_pdu) (struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
 			 char *data, uint32_t data_size);
-	void (*get_stats) (iscsi_connh_t conn, struct iscsi_stats *stats);
+	void (*get_stats) (struct iscsi_cls_conn *conn,
+			   struct iscsi_stats *stats);
 };
 
 /*
@@ -93,15 +96,14 @@ extern int iscsi_unregister_transport(struct iscsi_transport *tt);
 /*
  * control plane upcalls
  */
-extern void iscsi_conn_error(iscsi_connh_t conn, enum iscsi_err error);
-extern int iscsi_recv_pdu(iscsi_connh_t conn, struct iscsi_hdr *hdr,
+extern void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error);
+extern int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
 			  char *data, uint32_t data_size);
 
 struct iscsi_cls_conn {
 	struct list_head conn_list;	/* item in connlist */
 	void *dd_data;			/* LLD private data */
 	struct iscsi_transport *transport;
-	iscsi_connh_t connh;
 	int active;			/* must be accessed with the connlock */
 	struct device dev;		/* sysfs transport/container device */
 	struct mempool_zone *z_error;
@@ -113,7 +115,7 @@ struct iscsi_cls_conn {
 	container_of(_dev, struct iscsi_cls_conn, dev)
 
 struct iscsi_cls_session {
-	struct list_head list;	/* item in session_list */
+	struct list_head sess_list;		/* item in session_list */
 	struct iscsi_transport *transport;
 	struct device dev;	/* sysfs transport/container device */
 };
-- 
cgit v1.2.3


From 40ad7a6afc53217ad95b5ae2221e42d7655e057b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sun, 12 Feb 2006 23:30:11 -0800
Subject: [SPARC]: sys_newfstatat --> sys_fstatat64

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/systbls.S       |  2 +-
 arch/sparc64/kernel/sys_sparc32.c | 21 +++++++++++++++++++++
 arch/sparc64/kernel/systbls.S     |  4 ++--
 include/asm-sparc/unistd.h        |  2 +-
 include/asm-sparc64/unistd.h      |  2 +-
 5 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S
index c0314705d73a..768de64b371f 100644
--- a/arch/sparc/kernel/systbls.S
+++ b/arch/sparc/kernel/systbls.S
@@ -76,7 +76,7 @@ sys_call_table:
 /*270*/	.long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
 /*275*/	.long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
 /*280*/	.long sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl, sys_openat
-/*285*/	.long sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_newfstatat
+/*285*/	.long sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64
 /*290*/	.long sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
 /*295*/	.long sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare
 
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index 9264ccbaaafa..417727bd87ba 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -428,6 +428,27 @@ asmlinkage long compat_sys_fstat64(unsigned int fd,
 	return error;
 }
 
+asmlinkage long compat_sys_fstatat64(unsigned int dfd, char __user *filename,
+		struct compat_stat64 __user * statbuf, int flag)
+{
+	struct kstat stat;
+	int error = -EINVAL;
+
+	if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+		goto out;
+
+	if (flag & AT_SYMLINK_NOFOLLOW)
+		error = vfs_lstat_fd(dfd, filename, &stat);
+	else
+		error = vfs_stat_fd(dfd, filename, &stat);
+
+	if (!error)
+		error = cp_compat_stat64(&stat, statbuf);
+
+out:
+	return error;
+}
+
 asmlinkage long compat_sys_sysfs(int option, u32 arg1, u32 arg2)
 {
 	return sys_sysfs(option, arg1, arg2);
diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
index a19168510be2..c3adb7ac167d 100644
--- a/arch/sparc64/kernel/systbls.S
+++ b/arch/sparc64/kernel/systbls.S
@@ -77,7 +77,7 @@ sys_call_table32:
 /*270*/	.word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
 	.word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
 /*280*/	.word sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat
-	.word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_newfstatat
+	.word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64
 /*285*/	.word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
 	.word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare
 
@@ -146,7 +146,7 @@ sys_call_table:
 /*270*/	.word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
 	.word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
 /*280*/	.word sys_nis_syscall, sys_add_key, sys_request_key, sys_keyctl, sys_openat
-	.word sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_newfstatat
+	.word sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64
 /*285*/	.word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
 	.word sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare
 
diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h
index 0615d601a7c6..64ec640a40ee 100644
--- a/include/asm-sparc/unistd.h
+++ b/include/asm-sparc/unistd.h
@@ -305,7 +305,7 @@
 #define __NR_mknodat		286
 #define __NR_fchownat		287
 #define __NR_futimesat		288
-#define __NR_newfstatat		289
+#define __NR_fstatat64		289
 #define __NR_unlinkat		290
 #define __NR_renameat		291
 #define __NR_linkat		292
diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h
index c58ba8a096cf..a284986b1541 100644
--- a/include/asm-sparc64/unistd.h
+++ b/include/asm-sparc64/unistd.h
@@ -307,7 +307,7 @@
 #define __NR_mknodat		286
 #define __NR_fchownat		287
 #define __NR_futimesat		288
-#define __NR_newfstatat		289
+#define __NR_fstatat64		289
 #define __NR_unlinkat		290
 #define __NR_renameat		291
 #define __NR_linkat		292
-- 
cgit v1.2.3


From 56f3a40a5e7586043260669cc794e56fa58339e1 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 13 Feb 2006 11:39:57 +0100
Subject: [Bluetooth] Reduce L2CAP MTU for RFCOMM connections

This patch reduces the default L2CAP MTU for all RFCOMM connections
from 1024 to 1013 to improve the interoperability with some broken
RFCOMM implementations. To make this more flexible the L2CAP MTU
becomes also a module parameter and so it can changed at runtime.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/rfcomm.h |  2 +-
 net/bluetooth/rfcomm/core.c    | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index bbfac86734ec..89d743cfdfdf 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -33,7 +33,7 @@
 #define RFCOMM_DEFAULT_MTU	127
 #define RFCOMM_DEFAULT_CREDITS	7
 
-#define RFCOMM_MAX_L2CAP_MTU	1024
+#define RFCOMM_MAX_L2CAP_MTU	1013
 #define RFCOMM_MAX_CREDITS	40
 
 #define RFCOMM_SKB_HEAD_RESERVE	8
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 0d89d6434136..5b4253c61f62 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -46,13 +46,15 @@
 #include <net/bluetooth/l2cap.h>
 #include <net/bluetooth/rfcomm.h>
 
-#define VERSION "1.6"
-
 #ifndef CONFIG_BT_RFCOMM_DEBUG
 #undef  BT_DBG
 #define BT_DBG(D...)
 #endif
 
+#define VERSION "1.7"
+
+static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU;
+
 static struct task_struct *rfcomm_thread;
 
 static DECLARE_MUTEX(rfcomm_sem);
@@ -623,7 +625,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
 	/* Set L2CAP options */
 	sk = sock->sk;
 	lock_sock(sk);
-	l2cap_pi(sk)->imtu = RFCOMM_MAX_L2CAP_MTU;
+	l2cap_pi(sk)->imtu = l2cap_mtu;
 	release_sock(sk);
 
 	s = rfcomm_session_add(sock, BT_BOUND);
@@ -1868,7 +1870,7 @@ static int rfcomm_add_listener(bdaddr_t *ba)
 	/* Set L2CAP options */
 	sk = sock->sk;
 	lock_sock(sk);
-	l2cap_pi(sk)->imtu = RFCOMM_MAX_L2CAP_MTU;
+	l2cap_pi(sk)->imtu = l2cap_mtu;
 	release_sock(sk);
 
 	/* Start listening on the socket */
@@ -2070,6 +2072,9 @@ static void __exit rfcomm_exit(void)
 module_init(rfcomm_init);
 module_exit(rfcomm_exit);
 
+module_param(l2cap_mtu, uint, 0644);
+MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection");
+
 MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth RFCOMM ver " VERSION);
 MODULE_VERSION(VERSION);
-- 
cgit v1.2.3


From 7a11c4d0635d9f6995736390b8c3346fe6f63d57 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 13 Feb 2006 15:34:11 -0800
Subject: [IRDA]: Ratelimit messages.

From: Joe Perches <joe@perches.com>

Based upon a patch by Dave Jones.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/irda/irda.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h
index 05a840837fe7..1880e46ecc9b 100644
--- a/include/net/irda/irda.h
+++ b/include/net/irda/irda.h
@@ -82,9 +82,9 @@ do { if(!(expr)) { \
 #define IRDA_ASSERT_LABEL(label)
 #endif /* CONFIG_IRDA_DEBUG */
 
-#define IRDA_WARNING(args...) printk(KERN_WARNING args)
-#define IRDA_MESSAGE(args...) printk(KERN_INFO args)
-#define IRDA_ERROR(args...)   printk(KERN_ERR args)
+#define IRDA_WARNING(args...) do { if (net_ratelimit()) printk(KERN_WARNING args); } while (0)
+#define IRDA_MESSAGE(args...) do { if (net_ratelimit()) printk(KERN_INFO args); } while (0)
+#define IRDA_ERROR(args...)   do { if (net_ratelimit()) printk(KERN_ERR args); } while (0)
 
 /*
  *  Magic numbers used by Linux-IrDA. Random numbers which must be unique to 
-- 
cgit v1.2.3


From faead26d7a06605add627f29aee73ba654ce11f9 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Tue, 14 Feb 2006 10:42:07 -0600
Subject: [PATCH] add scsi_execute_in_process_context() API

We have several points in the SCSI stack (primarily for our device
functions) where we need to guarantee process context, but (given the
place where the last reference was released) we cannot guarantee this.

This API gets around the issue by executing the function directly if
the caller has process context, but scheduling a workqueue to execute
in process context if the caller doesn't have it.  Unfortunately, it
requires memory allocation in interrupt context, but it's better than
what we have previously.  The true solution will require a bit of
re-engineering, so isn't appropriate for 2.6.16.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_lib.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/scsi/scsi.h     |  2 ++
 2 files changed, 61 insertions(+)

(limited to 'include')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 4a602853a98e..4362dcde74af 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
+#include <linux/hardirq.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_dbg.h>
@@ -2248,3 +2249,61 @@ scsi_target_unblock(struct device *dev)
 		device_for_each_child(dev, NULL, target_unblock);
 }
 EXPORT_SYMBOL_GPL(scsi_target_unblock);
+
+
+struct work_queue_work {
+	struct work_struct	work;
+	void			(*fn)(void *);
+	void			*data;
+};
+
+static void execute_in_process_context_work(void *data)
+{
+	void (*fn)(void *data);
+	struct work_queue_work *wqw = data;
+
+	fn = wqw->fn;
+	data = wqw->data;
+
+	kfree(wqw);
+
+	fn(data);
+}
+
+/**
+ * scsi_execute_in_process_context - reliably execute the routine with user context
+ * @fn:		the function to execute
+ * @data:	data to pass to the function
+ *
+ * Executes the function immediately if process context is available,
+ * otherwise schedules the function for delayed execution.
+ *
+ * Returns:	0 - function was executed
+ *		1 - function was scheduled for execution
+ *		<0 - error
+ */
+int scsi_execute_in_process_context(void (*fn)(void *data), void *data)
+{
+	struct work_queue_work *wqw;
+
+	if (!in_interrupt()) {
+		fn(data);
+		return 0;
+	}
+
+	wqw = kmalloc(sizeof(struct work_queue_work), GFP_ATOMIC);
+
+	if (unlikely(!wqw)) {
+		printk(KERN_ERR "Failed to allocate memory\n");
+		WARN_ON(1);
+		return -ENOMEM;
+	}
+
+	INIT_WORK(&wqw->work, execute_in_process_context_work, wqw);
+	wqw->fn = fn;
+	wqw->data = data;
+	schedule_work(&wqw->work);
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(scsi_execute_in_process_context);
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index c60b8ff2f5e4..9c331258bc27 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -433,4 +433,6 @@ struct scsi_lun {
 /* Used to obtain the PCI location of a device */
 #define SCSI_IOCTL_GET_PCI		0x5387
 
+int scsi_execute_in_process_context(void (*fn)(void *data), void *data);
+
 #endif /* _SCSI_SCSI_H */
-- 
cgit v1.2.3


From f32ec77b421ee15bf5a42082b60679e997c07993 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 28 Nov 2005 13:10:54 +0000
Subject: [MIPS] RM200: Give RM200 it's own timex.h.

So we can get rid of config.h and the #ifdef crapola in the generic
timex.h.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/mach-generic/timex.h | 11 +----------
 include/asm-mips/mach-rm200/timex.h   | 13 +++++++++++++
 2 files changed, 14 insertions(+), 10 deletions(-)
 create mode 100644 include/asm-mips/mach-rm200/timex.h

(limited to 'include')

diff --git a/include/asm-mips/mach-generic/timex.h b/include/asm-mips/mach-generic/timex.h
index c6a2e5f0574a..48b4cfaa0d50 100644
--- a/include/asm-mips/mach-generic/timex.h
+++ b/include/asm-mips/mach-generic/timex.h
@@ -3,20 +3,11 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2003 by Ralf Baechle
+ * Copyright (C) 2003, 2005 by Ralf Baechle
  */
 #ifndef __ASM_MACH_GENERIC_TIMEX_H
 #define __ASM_MACH_GENERIC_TIMEX_H
 
-#include <linux/config.h>
-
-/*
- * Last remaining user of the i8254 PIC, will be converted, too ...
- */
-#ifdef CONFIG_SNI_RM200_PCI
-#define CLOCK_TICK_RATE		1193182
-#else
 #define CLOCK_TICK_RATE		500000
-#endif
 
 #endif /* __ASM_MACH_GENERIC_TIMEX_H */
diff --git a/include/asm-mips/mach-rm200/timex.h b/include/asm-mips/mach-rm200/timex.h
new file mode 100644
index 000000000000..11ff6cb0f214
--- /dev/null
+++ b/include/asm-mips/mach-rm200/timex.h
@@ -0,0 +1,13 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003, 2005 by Ralf Baechle
+ */
+#ifndef __ASM_MACH_RM200_TIMEX_H
+#define __ASM_MACH_RM200_TIMEX_H
+
+#define CLOCK_TICK_RATE		1193182
+
+#endif /* __ASM_MACH_RM200_TIMEX_H */
-- 
cgit v1.2.3


From 359bbd42a5a205234d5943571fc7bf946967ee59 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 9 Feb 2006 12:13:28 +0000
Subject: [MIPS] Fold non-__mips64 case into CONFIG_32BIT case.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/unistd.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h
index e7ff9b187783..769305d20108 100644
--- a/include/asm-mips/unistd.h
+++ b/include/asm-mips/unistd.h
@@ -1184,10 +1184,8 @@ type name (atype a,btype b,ctype c,dtype d,etype e,ftype f) \
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
-# ifndef __mips64
-#  define __ARCH_WANT_STAT64
-# endif
 # ifdef CONFIG_32BIT
+#  define __ARCH_WANT_STAT64
 #  define __ARCH_WANT_SYS_TIME
 # endif
 # ifdef CONFIG_MIPS32_O32
-- 
cgit v1.2.3


From 41700e73995d6c814932cb55e12525bd34be1ca5 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Fri, 10 Feb 2006 00:39:06 +0900
Subject: [MIPS] Add protected_blast_icache_range, blast_icache_range, etc.

Add blast_xxx_range(), protected_blast_xxx_range() etc. for common
use.  They are built by __BUILD_BLAST_CACHE_RANGE().
Use protected_cache_op() macro for various protected_ routines.
Output code should be logically same.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/c-r4k.c        | 104 ++++++--------------------------------------
 arch/mips/mm/c-tx39.c       |  70 ++++-------------------------
 include/asm-mips/r4kcache.h |  74 +++++++++++++++++--------------
 3 files changed, 64 insertions(+), 184 deletions(-)

(limited to 'include')

diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index e51c38cef88e..1b71d91e8268 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -471,61 +471,29 @@ struct flush_icache_range_args {
 static inline void local_r4k_flush_icache_range(void *args)
 {
 	struct flush_icache_range_args *fir_args = args;
-	unsigned long dc_lsize = cpu_dcache_line_size();
-	unsigned long ic_lsize = cpu_icache_line_size();
-	unsigned long sc_lsize = cpu_scache_line_size();
 	unsigned long start = fir_args->start;
 	unsigned long end = fir_args->end;
-	unsigned long addr, aend;
 
 	if (!cpu_has_ic_fills_f_dc) {
 		if (end - start > dcache_size) {
 			r4k_blast_dcache();
 		} else {
 			R4600_HIT_CACHEOP_WAR_IMPL;
-			addr = start & ~(dc_lsize - 1);
-			aend = (end - 1) & ~(dc_lsize - 1);
-
-			while (1) {
-				/* Hit_Writeback_Inv_D */
-				protected_writeback_dcache_line(addr);
-				if (addr == aend)
-					break;
-				addr += dc_lsize;
-			}
+			protected_blast_dcache_range(start, end);
 		}
 
 		if (!cpu_icache_snoops_remote_store) {
-			if (end - start > scache_size) {
+			if (end - start > scache_size)
 				r4k_blast_scache();
-			} else {
-				addr = start & ~(sc_lsize - 1);
-				aend = (end - 1) & ~(sc_lsize - 1);
-
-				while (1) {
-					/* Hit_Writeback_Inv_SD */
-					protected_writeback_scache_line(addr);
-					if (addr == aend)
-						break;
-					addr += sc_lsize;
-				}
-			}
+			else
+				protected_blast_scache_range(start, end);
 		}
 	}
 
 	if (end - start > icache_size)
 		r4k_blast_icache();
-	else {
-		addr = start & ~(ic_lsize - 1);
-		aend = (end - 1) & ~(ic_lsize - 1);
-		while (1) {
-			/* Hit_Invalidate_I */
-			protected_flush_icache_line(addr);
-			if (addr == aend)
-				break;
-			addr += ic_lsize;
-		}
-	}
+	else
+		protected_blast_icache_range(start, end);
 }
 
 static void r4k_flush_icache_range(unsigned long start, unsigned long end)
@@ -619,27 +587,14 @@ static void r4k_flush_icache_page(struct vm_area_struct *vma,
 
 static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 {
-	unsigned long end, a;
-
 	/* Catch bad driver code */
 	BUG_ON(size == 0);
 
 	if (cpu_has_subset_pcaches) {
-		unsigned long sc_lsize = cpu_scache_line_size();
-
-		if (size >= scache_size) {
+		if (size >= scache_size)
 			r4k_blast_scache();
-			return;
-		}
-
-		a = addr & ~(sc_lsize - 1);
-		end = (addr + size - 1) & ~(sc_lsize - 1);
-		while (1) {
-			flush_scache_line(a);	/* Hit_Writeback_Inv_SD */
-			if (a == end)
-				break;
-			a += sc_lsize;
-		}
+		else
+			blast_scache_range(addr, addr + size);
 		return;
 	}
 
@@ -651,17 +606,8 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 	if (size >= dcache_size) {
 		r4k_blast_dcache();
 	} else {
-		unsigned long dc_lsize = cpu_dcache_line_size();
-
 		R4600_HIT_CACHEOP_WAR_IMPL;
-		a = addr & ~(dc_lsize - 1);
-		end = (addr + size - 1) & ~(dc_lsize - 1);
-		while (1) {
-			flush_dcache_line(a);	/* Hit_Writeback_Inv_D */
-			if (a == end)
-				break;
-			a += dc_lsize;
-		}
+		blast_dcache_range(addr, addr + size);
 	}
 
 	bc_wback_inv(addr, size);
@@ -669,44 +615,22 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 
 static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 {
-	unsigned long end, a;
-
 	/* Catch bad driver code */
 	BUG_ON(size == 0);
 
 	if (cpu_has_subset_pcaches) {
-		unsigned long sc_lsize = cpu_scache_line_size();
-
-		if (size >= scache_size) {
+		if (size >= scache_size)
 			r4k_blast_scache();
-			return;
-		}
-
-		a = addr & ~(sc_lsize - 1);
-		end = (addr + size - 1) & ~(sc_lsize - 1);
-		while (1) {
-			flush_scache_line(a);	/* Hit_Writeback_Inv_SD */
-			if (a == end)
-				break;
-			a += sc_lsize;
-		}
+		else
+			blast_scache_range(addr, addr + size);
 		return;
 	}
 
 	if (size >= dcache_size) {
 		r4k_blast_dcache();
 	} else {
-		unsigned long dc_lsize = cpu_dcache_line_size();
-
 		R4600_HIT_CACHEOP_WAR_IMPL;
-		a = addr & ~(dc_lsize - 1);
-		end = (addr + size - 1) & ~(dc_lsize - 1);
-		while (1) {
-			flush_dcache_line(a);	/* Hit_Writeback_Inv_D */
-			if (a == end)
-				break;
-			a += dc_lsize;
-		}
+		blast_dcache_range(addr, addr + size);
 	}
 
 	bc_inv(addr, size);
diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c
index 0a97a9434eba..7c572bea4a98 100644
--- a/arch/mips/mm/c-tx39.c
+++ b/arch/mips/mm/c-tx39.c
@@ -44,8 +44,6 @@ __asm__ __volatile__( \
 /* TX39H-style cache flush routines. */
 static void tx39h_flush_icache_all(void)
 {
-	unsigned long start = KSEG0;
-	unsigned long end = (start + icache_size);
 	unsigned long flags, config;
 
 	/* disable icache (set ICE#) */
@@ -53,33 +51,18 @@ static void tx39h_flush_icache_all(void)
 	config = read_c0_conf();
 	write_c0_conf(config & ~TX39_CONF_ICE);
 	TX39_STOP_STREAMING();
-
-	/* invalidate icache */
-	while (start < end) {
-		cache16_unroll32(start, Index_Invalidate_I);
-		start += 0x200;
-	}
-
+	blast_icache16();
 	write_c0_conf(config);
 	local_irq_restore(flags);
 }
 
 static void tx39h_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 {
-	unsigned long end, a;
-	unsigned long dc_lsize = current_cpu_data.dcache.linesz;
-
 	/* Catch bad driver code */
 	BUG_ON(size == 0);
 
 	iob();
-	a = addr & ~(dc_lsize - 1);
-	end = (addr + size - 1) & ~(dc_lsize - 1);
-	while (1) {
-		invalidate_dcache_line(a); /* Hit_Invalidate_D */
-		if (a == end) break;
-		a += dc_lsize;
-	}
+	blast_inv_dcache_range(addr, addr + size);
 }
 
 
@@ -241,42 +224,21 @@ static void tx39_flush_data_cache_page(unsigned long addr)
 
 static void tx39_flush_icache_range(unsigned long start, unsigned long end)
 {
-	unsigned long dc_lsize = current_cpu_data.dcache.linesz;
-	unsigned long addr, aend;
-
 	if (end - start > dcache_size)
 		tx39_blast_dcache();
-	else {
-		addr = start & ~(dc_lsize - 1);
-		aend = (end - 1) & ~(dc_lsize - 1);
-
-		while (1) {
-			/* Hit_Writeback_Inv_D */
-			protected_writeback_dcache_line(addr);
-			if (addr == aend)
-				break;
-			addr += dc_lsize;
-		}
-	}
+	else
+		protected_blast_dcache_range(start, end);
 
 	if (end - start > icache_size)
 		tx39_blast_icache();
 	else {
 		unsigned long flags, config;
-		addr = start & ~(dc_lsize - 1);
-		aend = (end - 1) & ~(dc_lsize - 1);
 		/* disable icache (set ICE#) */
 		local_irq_save(flags);
 		config = read_c0_conf();
 		write_c0_conf(config & ~TX39_CONF_ICE);
 		TX39_STOP_STREAMING();
-		while (1) {
-			/* Hit_Invalidate_I */
-			protected_flush_icache_line(addr);
-			if (addr == aend)
-				break;
-			addr += dc_lsize;
-		}
+		protected_blast_icache_range(start, end);
 		write_c0_conf(config);
 		local_irq_restore(flags);
 	}
@@ -311,7 +273,7 @@ static void tx39_flush_icache_page(struct vm_area_struct *vma, struct page *page
 
 static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 {
-	unsigned long end, a;
+	unsigned long end;
 
 	if (((size | addr) & (PAGE_SIZE - 1)) == 0) {
 		end = addr + size;
@@ -322,20 +284,13 @@ static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 	} else if (size > dcache_size) {
 		tx39_blast_dcache();
 	} else {
-		unsigned long dc_lsize = current_cpu_data.dcache.linesz;
-		a = addr & ~(dc_lsize - 1);
-		end = (addr + size - 1) & ~(dc_lsize - 1);
-		while (1) {
-			flush_dcache_line(a); /* Hit_Writeback_Inv_D */
-			if (a == end) break;
-			a += dc_lsize;
-		}
+		blast_dcache_range(addr, addr + size);
 	}
 }
 
 static void tx39_dma_cache_inv(unsigned long addr, unsigned long size)
 {
-	unsigned long end, a;
+	unsigned long end;
 
 	if (((size | addr) & (PAGE_SIZE - 1)) == 0) {
 		end = addr + size;
@@ -346,14 +301,7 @@ static void tx39_dma_cache_inv(unsigned long addr, unsigned long size)
 	} else if (size > dcache_size) {
 		tx39_blast_dcache();
 	} else {
-		unsigned long dc_lsize = current_cpu_data.dcache.linesz;
-		a = addr & ~(dc_lsize - 1);
-		end = (addr + size - 1) & ~(dc_lsize - 1);
-		while (1) {
-			invalidate_dcache_line(a); /* Hit_Invalidate_D */
-			if (a == end) break;
-			a += dc_lsize;
-		}
+		blast_inv_dcache_range(addr, addr + size);
 	}
 }
 
diff --git a/include/asm-mips/r4kcache.h b/include/asm-mips/r4kcache.h
index cc53196efa40..9632c27dad15 100644
--- a/include/asm-mips/r4kcache.h
+++ b/include/asm-mips/r4kcache.h
@@ -14,6 +14,7 @@
 
 #include <asm/asm.h>
 #include <asm/cacheops.h>
+#include <asm/cpu-features.h>
 
 /*
  * This macro return a properly sign-extended address suitable as base address
@@ -78,22 +79,25 @@ static inline void flush_scache_line(unsigned long addr)
 	cache_op(Hit_Writeback_Inv_SD, addr);
 }
 
+#define protected_cache_op(op,addr)				\
+	__asm__ __volatile__(					\
+	"	.set	push			\n"		\
+	"	.set	noreorder		\n"		\
+	"	.set	mips3			\n"		\
+	"1:	cache	%0, (%1)		\n"		\
+	"2:	.set	pop			\n"		\
+	"	.section __ex_table,\"a\"	\n"		\
+	"	"STR(PTR)" 1b, 2b		\n"		\
+	"	.previous"					\
+	:							\
+	: "i" (op), "r" (addr))
+
 /*
  * The next two are for badland addresses like signal trampolines.
  */
 static inline void protected_flush_icache_line(unsigned long addr)
 {
-	__asm__ __volatile__(
-		"	.set	push			\n"
-		"	.set	noreorder		\n"
-		"	.set	mips3			\n"
-		"1:	cache	%0, (%1)		\n"
-		"2:	.set	pop			\n"
-		"	.section __ex_table,\"a\"	\n"
-		"	"STR(PTR)" 1b, 2b		\n"
-		"	.previous"
-		:
-		: "i" (Hit_Invalidate_I), "r" (addr));
+	protected_cache_op(Hit_Invalidate_I, addr);
 }
 
 /*
@@ -104,32 +108,12 @@ static inline void protected_flush_icache_line(unsigned long addr)
  */
 static inline void protected_writeback_dcache_line(unsigned long addr)
 {
-	__asm__ __volatile__(
-		"	.set	push			\n"
-		"	.set	noreorder		\n"
-		"	.set	mips3			\n"
-		"1:	cache	%0, (%1)		\n"
-		"2:	.set	pop			\n"
-		"	.section __ex_table,\"a\"	\n"
-		"	"STR(PTR)" 1b, 2b		\n"
-		"	.previous"
-		:
-		: "i" (Hit_Writeback_Inv_D), "r" (addr));
+	protected_cache_op(Hit_Writeback_Inv_D, addr);
 }
 
 static inline void protected_writeback_scache_line(unsigned long addr)
 {
-	__asm__ __volatile__(
-		"	.set	push			\n"
-		"	.set	noreorder		\n"
-		"	.set	mips3			\n"
-		"1:	cache	%0, (%1)		\n"
-		"2:	.set	pop			\n"
-		"	.section __ex_table,\"a\"	\n"
-		"	"STR(PTR)" 1b, 2b		\n"
-		"	.previous"
-		:
-		: "i" (Hit_Writeback_Inv_SD), "r" (addr));
+	protected_cache_op(Hit_Writeback_Inv_SD, addr);
 }
 
 /*
@@ -295,4 +279,28 @@ __BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 64)
 __BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64)
 __BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128)
 
+/* build blast_xxx_range, protected_blast_xxx_range */
+#define __BUILD_BLAST_CACHE_RANGE(pfx, desc, hitop, prot) \
+static inline void prot##blast_##pfx##cache##_range(unsigned long start, \
+						    unsigned long end)	\
+{									\
+	unsigned long lsize = cpu_##desc##_line_size();			\
+	unsigned long addr = start & ~(lsize - 1);			\
+	unsigned long aend = (end - 1) & ~(lsize - 1);			\
+	while (1) {							\
+		prot##cache_op(hitop, addr);				\
+		if (addr == aend)					\
+			break;						\
+		addr += lsize;						\
+	}								\
+}
+
+__BUILD_BLAST_CACHE_RANGE(d, dcache, Hit_Writeback_Inv_D, protected_)
+__BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, protected_)
+__BUILD_BLAST_CACHE_RANGE(i, icache, Hit_Invalidate_I, protected_)
+__BUILD_BLAST_CACHE_RANGE(d, dcache, Hit_Writeback_Inv_D, )
+__BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, )
+/* blast_inv_dcache_range */
+__BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, )
+
 #endif /* _ASM_R4KCACHE_H */
-- 
cgit v1.2.3


From 3218357c94af92478ef39163163a81e654385320 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 10 Feb 2006 01:31:24 +0000
Subject: [MIPS] More uaccess.h fixes with gcc >= 4.0.1.

From Richard Sandiford <richard@codesourcery.com>:

This patch caused a miscompilation of the restore_gp_regs() block
in restore_sigcontext().  This was in a 32-bit kernel compiled with
GCC CVS head.

restore_gp_regs() copies 64-bit user fields into 32-bit variables,
and in this combination, the new __get_user_asm_ll32() clobbers too
many registers.  It says:

/*
 * Get a long long 64 using 32 bit registers.
 */
{									\
	__asm__ __volatile__(						\
	"1:	lw	%1, (%3)				\n"	\
	"2:	lw	%D1, 4(%3)				\n"	\
	"	move	%0, $0					\n"	\
	"3:	.section	.fixup,\"ax\"			\n"	\
	"4:	li	%0, %4					\n"	\
	"	move	%1, $0					\n"	\
	"	move	%D1, $0					\n"	\
	"	j	3b					\n"	\
	"	.previous					\n"	\
	"	.section	__ex_table,\"a\"		\n"	\
	"	" __UA_ADDR "	1b, 4b				\n"	\
	"	" __UA_ADDR "	2b, 4b				\n"	\
	"	.previous					\n"	\
	: "=r" (__gu_err), "=&r" (val)					\
	: "0" (0), "r" (addr), "i" (-EFAULT));				\
}

and this requires val (%1) to be a 64-bit value.  In the case I saw,
gcc was using $3 for the 32-bit val, and wasn't expecting $4 to be
clobbered.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/uaccess.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-mips/uaccess.h b/include/asm-mips/uaccess.h
index 91d813a37823..7a553e9d44d3 100644
--- a/include/asm-mips/uaccess.h
+++ b/include/asm-mips/uaccess.h
@@ -266,6 +266,8 @@ do {									\
  */
 #define __get_user_asm_ll32(val, addr)					\
 {									\
+        unsigned long long __gu_tmp;					\
+									\
 	__asm__ __volatile__(						\
 	"1:	lw	%1, (%3)				\n"	\
 	"2:	lw	%D1, 4(%3)				\n"	\
@@ -280,8 +282,9 @@ do {									\
 	"	" __UA_ADDR "	1b, 4b				\n"	\
 	"	" __UA_ADDR "	2b, 4b				\n"	\
 	"	.previous					\n"	\
-	: "=r" (__gu_err), "=&r" (val)					\
+	: "=r" (__gu_err), "=&r" (__gu_tmp)				\
 	: "0" (0), "r" (addr), "i" (-EFAULT));				\
+	(val) = __gu_tmp;						\
 }
 
 /*
-- 
cgit v1.2.3


From fbb6b3a4ac0ccf12a97c98881d9d873d6dc26fe5 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 10 Feb 2006 14:13:08 +0000
Subject: [MIPS] Get rid of kludgery needed to keep stdargs of old compilers
 working.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Makefile             |  1 -
 include/asm-mips/gcc/sgidefs.h | 17 -----------------
 2 files changed, 18 deletions(-)
 delete mode 100644 include/asm-mips/gcc/sgidefs.h

(limited to 'include')

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 6a57407df1bc..38c0f3360d51 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -94,7 +94,6 @@ endif
 # machines may also.  Since BFD is incredibly buggy with respect to
 # crossformat linking we rely on the elf2ecoff tool for format conversion.
 #
-cflags-y			+= -I $(TOPDIR)/include/asm/gcc
 cflags-y			+= -G 0 -mno-abicalls -fno-pic -pipe
 LDFLAGS_vmlinux			+= -G 0 -static -n -nostdlib
 MODFLAGS			+= -mlong-calls
diff --git a/include/asm-mips/gcc/sgidefs.h b/include/asm-mips/gcc/sgidefs.h
deleted file mode 100644
index 05994371a2af..000000000000
--- a/include/asm-mips/gcc/sgidefs.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * include/sgidefs.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1996 by Ralf Baechle
- *
- * This file is here to satisfy GCC's expectations.
- */
-#ifndef __SGIDEFS_H
-#define __SGIDEFS_H
-
-#include <asm/sgidefs.h>
-
-#endif /* __SGIDEFS_H */
-- 
cgit v1.2.3


From 9cf8ff96447f995d5ea18ec9f25dc8dae26501a2 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Mon, 13 Feb 2006 09:15:49 +0000
Subject: [MIPS] Fix CPU type bitmasks for MIPS III, IV and V.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/cpu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-mips/cpu.h b/include/asm-mips/cpu.h
index 934e063e79f1..818b9a97e214 100644
--- a/include/asm-mips/cpu.h
+++ b/include/asm-mips/cpu.h
@@ -204,9 +204,9 @@
  */
 #define MIPS_CPU_ISA_I		0x00000001
 #define MIPS_CPU_ISA_II		0x00000002
-#define MIPS_CPU_ISA_III	0x00000003
-#define MIPS_CPU_ISA_IV		0x00000004
-#define MIPS_CPU_ISA_V		0x00000005
+#define MIPS_CPU_ISA_III	0x00000004
+#define MIPS_CPU_ISA_IV		0x00000008
+#define MIPS_CPU_ISA_V		0x00000010
 #define MIPS_CPU_ISA_M32R1	0x00000020
 #define MIPS_CPU_ISA_M32R2	0x00000040
 #define MIPS_CPU_ISA_M64R1	0x00000080
-- 
cgit v1.2.3


From a6b14fa6fdc01ab3519c2729624f808677539b59 Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Tue, 14 Feb 2006 15:01:12 -0800
Subject: [IA64] Count disabled cpus as potential hot-pluggable CPUs

Have a facility to account for potentially hot-pluggable CPUs. ACPI doesnt
give a determinstic method to find hot-pluggable CPUs. Hence we use 2 methods
to assist.

- BIOS can mark potentially hot-pluggable CPUs as disabled in the MADT tables.
- User can specify the number of hot-pluggable CPUs via parameter
  additional_cpus=X

The option is enabled only if ACPI_CONFIG_HOTPLUG_CPU=y which enables the
physical hotplug option. Without which user can still use logical onlining
and offlining of CPUs by enabling CONFIG_HOTPLUG_CPU=y

Adds more bits to cpu_possible_map for potentially hot-pluggable cpus.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/acpi.c  | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
 arch/ia64/kernel/setup.c |  4 ++++
 include/asm-ia64/acpi.h  |  2 ++
 3 files changed, 62 insertions(+)

(limited to 'include')

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index d2702c419cf8..34795ede72e0 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -761,6 +761,62 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
 	return (0);
 }
 
+int additional_cpus __initdata = -1;
+
+static __init int setup_additional_cpus(char *s)
+{
+	if (s)
+		additional_cpus = simple_strtol(s, NULL, 0);
+
+	return 0;
+}
+
+early_param("additional_cpus", setup_additional_cpus);
+
+/*
+ * cpu_possible_map should be static, it cannot change as cpu's
+ * are onlined, or offlined. The reason is per-cpu data-structures
+ * are allocated by some modules at init time, and dont expect to
+ * do this dynamically on cpu arrival/departure.
+ * cpu_present_map on the other hand can change dynamically.
+ * In case when cpu_hotplug is not compiled, then we resort to current
+ * behaviour, which is cpu_possible == cpu_present.
+ * - Ashok Raj
+ *
+ * Three ways to find out the number of additional hotplug CPUs:
+ * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
+ * - The user can overwrite it with additional_cpus=NUM
+ * - Otherwise don't reserve additional CPUs.
+ */
+__init void prefill_possible_map(void)
+{
+	int i;
+	int possible, disabled_cpus;
+
+	disabled_cpus = total_cpus - available_cpus;
+ 	if (additional_cpus == -1) {
+ 		if (disabled_cpus > 0) {
+ 			possible = total_cpus;
+			additional_cpus = disabled_cpus;
+		}
+ 		else {
+			possible = available_cpus;
+			additional_cpus = 0;
+		}
+ 	} else {
+		possible = available_cpus + additional_cpus;
+	}
+	if (possible > NR_CPUS)
+		possible = NR_CPUS;
+
+	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
+		possible,
+	        max_t(int, additional_cpus, 0));
+
+	for (i = 0; i < possible; i++)
+		cpu_set(i, cpu_possible_map);
+}
+
 int acpi_map_lsapic(acpi_handle handle, int *pcpu)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 35f7835294a3..3258e09278d0 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -430,6 +430,7 @@ setup_arch (char **cmdline_p)
 	if (early_console_setup(*cmdline_p) == 0)
 		mark_bsp_online();
 
+	parse_early_param();
 #ifdef CONFIG_ACPI
 	/* Initialize the ACPI boot-time table parser */
 	acpi_table_init();
@@ -688,6 +689,9 @@ void
 setup_per_cpu_areas (void)
 {
 	/* start_kernel() requires this... */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+	prefill_possible_map();
+#endif
 }
 
 /*
diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h
index 3a544ffc5008..f7a517654308 100644
--- a/include/asm-ia64/acpi.h
+++ b/include/asm-ia64/acpi.h
@@ -106,6 +106,8 @@ extern unsigned int can_cpei_retarget(void);
 extern unsigned int is_cpu_cpei_target(unsigned int cpu);
 extern void set_cpei_target_cpu(unsigned int cpu);
 extern unsigned int get_cpei_target_cpu(void);
+extern void prefill_possible_map(void);
+extern int additional_cpus;
 
 #ifdef CONFIG_ACPI_NUMA
 /* Proximity bitmap length; _PXM is at most 255 (8 bit)*/
-- 
cgit v1.2.3


From 7c8903f6373f9abecf060bad53ca36bc4ac037f2 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 14 Feb 2006 13:53:03 -0800
Subject: [PATCH] jbd: revert checkpoint list changes

This patch reverts commit f93ea411b73594f7d144855fd34278bcf34a9afc:
  [PATCH] jbd: split checkpoint lists

This broke journal_flush() for OCFS2, which is its method of being sure
that metadata is sent to disk for another node.

And two related commits 8d3c7fce2d20ecc3264c8d8c91ae3beacdeaed1b and
43c3e6f5abdf6acac9b90c86bf03f995bf7d3d92 with the subjects:
  [PATCH] jbd: log_do_checkpoint fix
  [PATCH] jbd: remove_transaction fix

These seem to be incremental bugfixes on the original patch and as such are
no longer needed.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Cc: Jan Kara <jack@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/checkpoint.c | 418 ++++++++++++++++++++++------------------------------
 fs/jbd/commit.c     |   3 +-
 include/linux/jbd.h |   8 +-
 3 files changed, 179 insertions(+), 250 deletions(-)

(limited to 'include')

diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index e6265a0b56b8..543ed543d1e5 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -24,75 +24,29 @@
 #include <linux/slab.h>
 
 /*
- * Unlink a buffer from a transaction checkpoint list.
+ * Unlink a buffer from a transaction.
  *
  * Called with j_list_lock held.
  */
 
-static void __buffer_unlink_first(struct journal_head *jh)
+static inline void __buffer_unlink(struct journal_head *jh)
 {
 	transaction_t *transaction;
 
 	transaction = jh->b_cp_transaction;
+	jh->b_cp_transaction = NULL;
 
 	jh->b_cpnext->b_cpprev = jh->b_cpprev;
 	jh->b_cpprev->b_cpnext = jh->b_cpnext;
-	if (transaction->t_checkpoint_list == jh) {
+	if (transaction->t_checkpoint_list == jh)
 		transaction->t_checkpoint_list = jh->b_cpnext;
-		if (transaction->t_checkpoint_list == jh)
-			transaction->t_checkpoint_list = NULL;
-	}
-}
-
-/*
- * Unlink a buffer from a transaction checkpoint(io) list.
- *
- * Called with j_list_lock held.
- */
-
-static inline void __buffer_unlink(struct journal_head *jh)
-{
-	transaction_t *transaction;
-
-	transaction = jh->b_cp_transaction;
-
-	__buffer_unlink_first(jh);
-	if (transaction->t_checkpoint_io_list == jh) {
-		transaction->t_checkpoint_io_list = jh->b_cpnext;
-		if (transaction->t_checkpoint_io_list == jh)
-			transaction->t_checkpoint_io_list = NULL;
-	}
-}
-
-/*
- * Move a buffer from the checkpoint list to the checkpoint io list
- *
- * Called with j_list_lock held
- */
-
-static inline void __buffer_relink_io(struct journal_head *jh)
-{
-	transaction_t *transaction;
-
-	transaction = jh->b_cp_transaction;
-	__buffer_unlink_first(jh);
-
-	if (!transaction->t_checkpoint_io_list) {
-		jh->b_cpnext = jh->b_cpprev = jh;
-	} else {
-		jh->b_cpnext = transaction->t_checkpoint_io_list;
-		jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
-		jh->b_cpprev->b_cpnext = jh;
-		jh->b_cpnext->b_cpprev = jh;
-	}
-	transaction->t_checkpoint_io_list = jh;
+	if (transaction->t_checkpoint_list == jh)
+		transaction->t_checkpoint_list = NULL;
 }
 
 /*
  * Try to release a checkpointed buffer from its transaction.
- * Returns 1 if we released it and 2 if we also released the
- * whole transaction.
- *
+ * Returns 1 if we released it.
  * Requires j_list_lock
  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
  */
@@ -103,11 +57,12 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 
 	if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
-		ret = __journal_remove_checkpoint(jh) + 1;
+		__journal_remove_checkpoint(jh);
 		jbd_unlock_bh_state(bh);
 		journal_remove_journal_head(bh);
 		BUFFER_TRACE(bh, "release");
 		__brelse(bh);
+		ret = 1;
 	} else {
 		jbd_unlock_bh_state(bh);
 	}
@@ -162,53 +117,83 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
 }
 
 /*
- * Clean up transaction's list of buffers submitted for io.
- * We wait for any pending IO to complete and remove any clean
- * buffers. Note that we take the buffers in the opposite ordering
- * from the one in which they were submitted for IO.
+ * Clean up a transaction's checkpoint list.
+ *
+ * We wait for any pending IO to complete and make sure any clean
+ * buffers are removed from the transaction.
+ *
+ * Return 1 if we performed any actions which might have destroyed the
+ * checkpoint.  (journal_remove_checkpoint() deletes the transaction when
+ * the last checkpoint buffer is cleansed)
  *
  * Called with j_list_lock held.
  */
-
-static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
+static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
 {
-	struct journal_head *jh;
+	struct journal_head *jh, *next_jh, *last_jh;
 	struct buffer_head *bh;
-	tid_t this_tid;
-	int released = 0;
-
-	this_tid = transaction->t_tid;
-restart:
-	/* Didn't somebody clean up the transaction in the meanwhile */
-	if (journal->j_checkpoint_transactions != transaction ||
-		transaction->t_tid != this_tid)
-		return;
-	while (!released && transaction->t_checkpoint_io_list) {
-		jh = transaction->t_checkpoint_io_list;
+	int ret = 0;
+
+	assert_spin_locked(&journal->j_list_lock);
+	jh = transaction->t_checkpoint_list;
+	if (!jh)
+		return 0;
+
+	last_jh = jh->b_cpprev;
+	next_jh = jh;
+	do {
+		jh = next_jh;
 		bh = jh2bh(jh);
-		if (!jbd_trylock_bh_state(bh)) {
-			jbd_sync_bh(journal, bh);
-			spin_lock(&journal->j_list_lock);
-			goto restart;
-		}
 		if (buffer_locked(bh)) {
 			atomic_inc(&bh->b_count);
 			spin_unlock(&journal->j_list_lock);
-			jbd_unlock_bh_state(bh);
 			wait_on_buffer(bh);
 			/* the journal_head may have gone by now */
 			BUFFER_TRACE(bh, "brelse");
 			__brelse(bh);
-			spin_lock(&journal->j_list_lock);
-			goto restart;
+			goto out_return_1;
 		}
+
 		/*
-		 * Now in whatever state the buffer currently is, we know that
-		 * it has been written out and so we can drop it from the list
+		 * This is foul
 		 */
-		released = __journal_remove_checkpoint(jh);
-		jbd_unlock_bh_state(bh);
-	}
+		if (!jbd_trylock_bh_state(bh)) {
+			jbd_sync_bh(journal, bh);
+			goto out_return_1;
+		}
+
+		if (jh->b_transaction != NULL) {
+			transaction_t *t = jh->b_transaction;
+			tid_t tid = t->t_tid;
+
+			spin_unlock(&journal->j_list_lock);
+			jbd_unlock_bh_state(bh);
+			log_start_commit(journal, tid);
+			log_wait_commit(journal, tid);
+			goto out_return_1;
+		}
+
+		/*
+		 * AKPM: I think the buffer_jbddirty test is redundant - it
+		 * shouldn't have NULL b_transaction?
+		 */
+		next_jh = jh->b_cpnext;
+		if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
+			BUFFER_TRACE(bh, "remove from checkpoint");
+			__journal_remove_checkpoint(jh);
+			jbd_unlock_bh_state(bh);
+			journal_remove_journal_head(bh);
+			__brelse(bh);
+			ret = 1;
+		} else {
+			jbd_unlock_bh_state(bh);
+		}
+	} while (jh != last_jh);
+
+	return ret;
+out_return_1:
+	spin_lock(&journal->j_list_lock);
+	return 1;
 }
 
 #define NR_BATCH	64
@@ -218,7 +203,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
 {
 	int i;
 
+	spin_unlock(&journal->j_list_lock);
 	ll_rw_block(SWRITE, *batch_count, bhs);
+	spin_lock(&journal->j_list_lock);
 	for (i = 0; i < *batch_count; i++) {
 		struct buffer_head *bh = bhs[i];
 		clear_buffer_jwrite(bh);
@@ -234,46 +221,19 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
  * Return 1 if something happened which requires us to abort the current
  * scan of the checkpoint list.  
  *
- * Called with j_list_lock held and drops it if 1 is returned
+ * Called with j_list_lock held.
  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
  */
-static int __process_buffer(journal_t *journal, struct journal_head *jh,
-			struct buffer_head **bhs, int *batch_count)
+static int __flush_buffer(journal_t *journal, struct journal_head *jh,
+			struct buffer_head **bhs, int *batch_count,
+			int *drop_count)
 {
 	struct buffer_head *bh = jh2bh(jh);
 	int ret = 0;
 
-	if (buffer_locked(bh)) {
-		get_bh(bh);
-		spin_unlock(&journal->j_list_lock);
-		jbd_unlock_bh_state(bh);
-		wait_on_buffer(bh);
-		/* the journal_head may have gone by now */
-		BUFFER_TRACE(bh, "brelse");
-		put_bh(bh);
-		ret = 1;
-	}
-	else if (jh->b_transaction != NULL) {
-		transaction_t *t = jh->b_transaction;
-		tid_t tid = t->t_tid;
+	if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) {
+		J_ASSERT_JH(jh, jh->b_transaction == NULL);
 
-		spin_unlock(&journal->j_list_lock);
-		jbd_unlock_bh_state(bh);
-		log_start_commit(journal, tid);
-		log_wait_commit(journal, tid);
-		ret = 1;
-	}
-	else if (!buffer_dirty(bh)) {
-		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
-		BUFFER_TRACE(bh, "remove from checkpoint");
-		__journal_remove_checkpoint(jh);
-		spin_unlock(&journal->j_list_lock);
-		jbd_unlock_bh_state(bh);
-		journal_remove_journal_head(bh);
-		put_bh(bh);
-		ret = 1;
-	}
-	else {
 		/*
 		 * Important: we are about to write the buffer, and
 		 * possibly block, while still holding the journal lock.
@@ -286,30 +246,45 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 		J_ASSERT_BH(bh, !buffer_jwrite(bh));
 		set_buffer_jwrite(bh);
 		bhs[*batch_count] = bh;
-		__buffer_relink_io(jh);
 		jbd_unlock_bh_state(bh);
 		(*batch_count)++;
 		if (*batch_count == NR_BATCH) {
-			spin_unlock(&journal->j_list_lock);
 			__flush_batch(journal, bhs, batch_count);
 			ret = 1;
 		}
+	} else {
+		int last_buffer = 0;
+		if (jh->b_cpnext == jh) {
+			/* We may be about to drop the transaction.  Tell the
+			 * caller that the lists have changed.
+			 */
+			last_buffer = 1;
+		}
+		if (__try_to_free_cp_buf(jh)) {
+			(*drop_count)++;
+			ret = last_buffer;
+		}
 	}
 	return ret;
 }
 
 /*
- * Perform an actual checkpoint. We take the first transaction on the
- * list of transactions to be checkpointed and send all its buffers
- * to disk. We submit larger chunks of data at once.
+ * Perform an actual checkpoint.  We don't write out only enough to
+ * satisfy the current blocked requests: rather we submit a reasonably
+ * sized chunk of the outstanding data to disk at once for
+ * efficiency.  __log_wait_for_space() will retry if we didn't free enough.
  * 
+ * However, we _do_ take into account the amount requested so that once
+ * the IO has been queued, we can return as soon as enough of it has
+ * completed to disk.
+ *
  * The journal should be locked before calling this function.
  */
 int log_do_checkpoint(journal_t *journal)
 {
-	transaction_t *transaction;
-	tid_t this_tid;
 	int result;
+	int batch_count = 0;
+	struct buffer_head *bhs[NR_BATCH];
 
 	jbd_debug(1, "Start checkpoint\n");
 
@@ -324,70 +299,79 @@ int log_do_checkpoint(journal_t *journal)
 		return result;
 
 	/*
-	 * OK, we need to start writing disk blocks.  Take one transaction
-	 * and write it.
+	 * OK, we need to start writing disk blocks.  Try to free up a
+	 * quarter of the log in a single checkpoint if we can.
 	 */
-	spin_lock(&journal->j_list_lock);
-	if (!journal->j_checkpoint_transactions)
-		goto out;
-	transaction = journal->j_checkpoint_transactions;
-	this_tid = transaction->t_tid;
-restart:
 	/*
-	 * If someone cleaned up this transaction while we slept, we're
-	 * done (maybe it's a new transaction, but it fell at the same
-	 * address).
+	 * AKPM: check this code.  I had a feeling a while back that it
+	 * degenerates into a busy loop at unmount time.
 	 */
- 	if (journal->j_checkpoint_transactions == transaction &&
-			transaction->t_tid == this_tid) {
-		int batch_count = 0;
-		struct buffer_head *bhs[NR_BATCH];
-		struct journal_head *jh;
-		int retry = 0;
-
-		while (!retry && transaction->t_checkpoint_list) {
+	spin_lock(&journal->j_list_lock);
+	while (journal->j_checkpoint_transactions) {
+		transaction_t *transaction;
+		struct journal_head *jh, *last_jh, *next_jh;
+		int drop_count = 0;
+		int cleanup_ret, retry = 0;
+		tid_t this_tid;
+
+		transaction = journal->j_checkpoint_transactions;
+		this_tid = transaction->t_tid;
+		jh = transaction->t_checkpoint_list;
+		last_jh = jh->b_cpprev;
+		next_jh = jh;
+		do {
 			struct buffer_head *bh;
 
-			jh = transaction->t_checkpoint_list;
+			jh = next_jh;
+			next_jh = jh->b_cpnext;
 			bh = jh2bh(jh);
 			if (!jbd_trylock_bh_state(bh)) {
 				jbd_sync_bh(journal, bh);
+				spin_lock(&journal->j_list_lock);
 				retry = 1;
 				break;
 			}
-			retry = __process_buffer(journal, jh, bhs,
-						&batch_count);
-			if (!retry &&
-			    lock_need_resched(&journal->j_list_lock)) {
-				spin_unlock(&journal->j_list_lock);
+			retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
+			if (cond_resched_lock(&journal->j_list_lock)) {
 				retry = 1;
 				break;
 			}
-		}
+		} while (jh != last_jh && !retry);
 
 		if (batch_count) {
-			if (!retry) {
-				spin_unlock(&journal->j_list_lock);
-				retry = 1;
-			}
 			__flush_batch(journal, bhs, &batch_count);
+			retry = 1;
 		}
 
-		if (retry) {
-			spin_lock(&journal->j_list_lock);
-			goto restart;
-		}
 		/*
-		 * Now we have cleaned up the first transaction's checkpoint
-		 * list.  Let's clean up the second one.
+		 * If someone cleaned up this transaction while we slept, we're
+		 * done
+		 */
+		if (journal->j_checkpoint_transactions != transaction)
+			break;
+		if (retry)
+			continue;
+		/*
+		 * Maybe it's a new transaction, but it fell at the same
+		 * address
 		 */
-		__wait_cp_io(journal, transaction);
+		if (transaction->t_tid != this_tid)
+			continue;
+		/*
+		 * We have walked the whole transaction list without
+		 * finding anything to write to disk.  We had better be
+		 * able to make some progress or we are in trouble.
+		 */
+		cleanup_ret = __cleanup_transaction(journal, transaction);
+		J_ASSERT(drop_count != 0 || cleanup_ret != 0);
+		if (journal->j_checkpoint_transactions != transaction)
+			break;
 	}
-out:
 	spin_unlock(&journal->j_list_lock);
 	result = cleanup_journal_tail(journal);
 	if (result < 0)
 		return result;
+
 	return 0;
 }
 
@@ -471,53 +455,6 @@ int cleanup_journal_tail(journal_t *journal)
 
 /* Checkpoint list management */
 
-/*
- * journal_clean_one_cp_list
- *
- * Find all the written-back checkpoint buffers in the given list and release them.
- *
- * Called with the journal locked.
- * Called with j_list_lock held.
- * Returns number of bufers reaped (for debug)
- */
-
-static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
-{
-	struct journal_head *last_jh;
-	struct journal_head *next_jh = jh;
-	int ret, freed = 0;
-
-	*released = 0;
-	if (!jh)
-		return 0;
-
- 	last_jh = jh->b_cpprev;
-	do {
-		jh = next_jh;
-		next_jh = jh->b_cpnext;
-		/* Use trylock because of the ranking */
-		if (jbd_trylock_bh_state(jh2bh(jh))) {
-			ret = __try_to_free_cp_buf(jh);
-			if (ret) {
-				freed++;
-				if (ret == 2) {
-					*released = 1;
-					return freed;
-				}
-			}
-		}
-		/*
-		 * This function only frees up some memory if possible so we
-		 * dont have an obligation to finish processing. Bail out if
-		 * preemption requested:
-		 */
-		if (need_resched())
-			return freed;
-	} while (jh != last_jh);
-
-	return freed;
-}
-
 /*
  * journal_clean_checkpoint_list
  *
@@ -525,38 +462,46 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
  *
  * Called with the journal locked.
  * Called with j_list_lock held.
- * Returns number of buffers reaped (for debug)
+ * Returns number of bufers reaped (for debug)
  */
 
 int __journal_clean_checkpoint_list(journal_t *journal)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
-	int ret = 0, released;
+	int ret = 0;
 
 	transaction = journal->j_checkpoint_transactions;
-	if (!transaction)
+	if (transaction == 0)
 		goto out;
 
 	last_transaction = transaction->t_cpprev;
 	next_transaction = transaction;
 	do {
+		struct journal_head *jh;
+
 		transaction = next_transaction;
 		next_transaction = transaction->t_cpnext;
-		ret += journal_clean_one_cp_list(transaction->
-				t_checkpoint_list, &released);
-		if (need_resched())
-			goto out;
-		if (released)
-			continue;
-		/*
-		 * It is essential that we are as careful as in the case of
-		 * t_checkpoint_list with removing the buffer from the list as
-		 * we can possibly see not yet submitted buffers on io_list
-		 */
-		ret += journal_clean_one_cp_list(transaction->
-				t_checkpoint_io_list, &released);
-		if (need_resched())
-			goto out;
+		jh = transaction->t_checkpoint_list;
+		if (jh) {
+			struct journal_head *last_jh = jh->b_cpprev;
+			struct journal_head *next_jh = jh;
+
+			do {
+				jh = next_jh;
+				next_jh = jh->b_cpnext;
+				/* Use trylock because of the ranknig */
+				if (jbd_trylock_bh_state(jh2bh(jh)))
+					ret += __try_to_free_cp_buf(jh);
+				/*
+				 * This function only frees up some memory
+				 * if possible so we dont have an obligation
+				 * to finish processing. Bail out if preemption
+				 * requested:
+				 */
+				if (need_resched())
+					goto out;
+			} while (jh != last_jh);
+		}
 	} while (transaction != last_transaction);
 out:
 	return ret;
@@ -571,22 +516,18 @@ out:
  * buffer updates committed in that transaction have safely been stored
  * elsewhere on disk.  To achieve this, all of the buffers in a
  * transaction need to be maintained on the transaction's checkpoint
- * lists until they have been rewritten, at which point this function is
+ * list until they have been rewritten, at which point this function is
  * called to remove the buffer from the existing transaction's
- * checkpoint lists.
- *
- * The function returns 1 if it frees the transaction, 0 otherwise.
+ * checkpoint list.
  *
  * This function is called with the journal locked.
  * This function is called with j_list_lock held.
- * This function is called with jbd_lock_bh_state(jh2bh(jh))
  */
 
-int __journal_remove_checkpoint(struct journal_head *jh)
+void __journal_remove_checkpoint(struct journal_head *jh)
 {
 	transaction_t *transaction;
 	journal_t *journal;
-	int ret = 0;
 
 	JBUFFER_TRACE(jh, "entry");
 
@@ -597,10 +538,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
 	journal = transaction->t_journal;
 
 	__buffer_unlink(jh);
-	jh->b_cp_transaction = NULL;
 
-	if (transaction->t_checkpoint_list != NULL ||
-	    transaction->t_checkpoint_io_list != NULL)
+	if (transaction->t_checkpoint_list != NULL)
 		goto out;
 	JBUFFER_TRACE(jh, "transaction has no more buffers");
 
@@ -626,10 +565,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
 	/* Just in case anybody was waiting for more transactions to be
            checkpointed... */
 	wake_up(&journal->j_wait_logspace);
-	ret = 1;
 out:
 	JBUFFER_TRACE(jh, "exit");
-	return ret;
 }
 
 /*
@@ -691,7 +628,6 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 	J_ASSERT(transaction->t_shadow_list == NULL);
 	J_ASSERT(transaction->t_log_list == NULL);
 	J_ASSERT(transaction->t_checkpoint_list == NULL);
-	J_ASSERT(transaction->t_checkpoint_io_list == NULL);
 	J_ASSERT(transaction->t_updates == 0);
 	J_ASSERT(journal->j_committing_transaction != transaction);
 	J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 29e62d98bae6..002ad2bbc769 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -829,8 +829,7 @@ restart_loop:
 	journal->j_committing_transaction = NULL;
 	spin_unlock(&journal->j_state_lock);
 
-	if (commit_transaction->t_checkpoint_list == NULL &&
-	    commit_transaction->t_checkpoint_io_list == NULL) {
+	if (commit_transaction->t_checkpoint_list == NULL) {
 		__journal_drop_transaction(journal, commit_transaction);
 	} else {
 		if (journal->j_checkpoint_transactions == NULL) {
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 0fe4aa891ddc..41ee79962bb2 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -497,12 +497,6 @@ struct transaction_s
 	 */
 	struct journal_head	*t_checkpoint_list;
 
-	/*
-	 * Doubly-linked circular list of all buffers submitted for IO while
-	 * checkpointing. [j_list_lock]
-	 */
-	struct journal_head	*t_checkpoint_io_list;
-
 	/*
 	 * Doubly-linked circular list of temporary buffers currently undergoing
 	 * IO in the log [j_list_lock]
@@ -852,7 +846,7 @@ extern void journal_commit_transaction(journal_t *);
 
 /* Checkpoint list management */
 int __journal_clean_checkpoint_list(journal_t *journal);
-int __journal_remove_checkpoint(struct journal_head *);
+void __journal_remove_checkpoint(struct journal_head *);
 void __journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
 /* Buffer IO */
-- 
cgit v1.2.3


From 5ac5f9d1ce8492163dbde5d357dc5d03becf7e36 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 14 Feb 2006 13:53:04 -0800
Subject: [PATCH] NLM: Fix the NLM_GRANTED callback checks

If 2 threads attached to the same process are blocking on different locks on
different files (maybe even on different servers) but have the same lock
arguments (i.e.  same offset+length - actually quite common, since most
processes try to lock the entire file) then the first GRANTED call that wakes
one up will also wake the other.

Currently when the NLM_GRANTED callback comes in, lockd walks the list of
blocked locks in search of a match to the lock that the NLM server has
granted.  Although it checks the lock pid, start and end, it fails to check
the filehandle and the server address.

By checking the filehandle and server IP address, we ensure that this only
happens if the locks truly are referencing the same file.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/clntlock.c         | 27 +++++++++++++++++----------
 fs/lockd/svc4proc.c         |  2 +-
 fs/lockd/svcproc.c          |  2 +-
 include/linux/lockd/lockd.h |  6 +++---
 4 files changed, 22 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 3eaf6e701087..da6354baa0b8 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -111,9 +111,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout)
 /*
  * The server lockd has called us back to tell us the lock was granted
  */
-u32
-nlmclnt_grant(struct nlm_lock *lock)
+u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
 {
+	const struct file_lock *fl = &lock->fl;
+	const struct nfs_fh *fh = &lock->fh;
 	struct nlm_wait	*block;
 	u32 res = nlm_lck_denied;
 
@@ -122,14 +123,20 @@ nlmclnt_grant(struct nlm_lock *lock)
 	 * Warning: must not use cookie to match it!
 	 */
 	list_for_each_entry(block, &nlm_blocked, b_list) {
-		if (nlm_compare_locks(block->b_lock, &lock->fl)) {
-			/* Alright, we found a lock. Set the return status
-			 * and wake up the caller
-			 */
-			block->b_status = NLM_LCK_GRANTED;
-			wake_up(&block->b_wait);
-			res = nlm_granted;
-		}
+		struct file_lock *fl_blocked = block->b_lock;
+
+		if (!nlm_compare_locks(fl_blocked, fl))
+			continue;
+		if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
+			continue;
+		if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_dentry->d_inode) ,fh) != 0)
+			continue;
+		/* Alright, we found a lock. Set the return status
+		 * and wake up the caller
+		 */
+		block->b_status = NLM_LCK_GRANTED;
+		wake_up(&block->b_wait);
+		res = nlm_granted;
 	}
 	return res;
 }
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4063095d849e..b10f913aa06a 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -228,7 +228,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
-	resp->status = nlmclnt_grant(&argp->lock);
+	resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock);
 	dprintk("lockd: GRANTED       status %d\n", ntohl(resp->status));
 	return rpc_success;
 }
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3bc437e0cf5b..35681d9cf1fc 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -256,7 +256,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
-	resp->status = nlmclnt_grant(&argp->lock);
+	resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock);
 	dprintk("lockd: GRANTED       status %d\n", ntohl(resp->status));
 	return rpc_success;
 }
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 920766cea79c..ef21ed296039 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -149,7 +149,7 @@ struct nlm_rqst * nlmclnt_alloc_call(void);
 int		  nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
 void		  nlmclnt_finish_block(struct nlm_rqst *req);
 long		  nlmclnt_block(struct nlm_rqst *req, long timeout);
-u32		  nlmclnt_grant(struct nlm_lock *);
+u32		  nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *);
 void		  nlmclnt_recovery(struct nlm_host *, u32);
 int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
 int		  nlmclnt_setgrantargs(struct nlm_rqst *, struct nlm_lock *);
@@ -204,7 +204,7 @@ nlmsvc_file_inode(struct nlm_file *file)
  * Compare two host addresses (needs modifying for ipv6)
  */
 static __inline__ int
-nlm_cmp_addr(struct sockaddr_in *sin1, struct sockaddr_in *sin2)
+nlm_cmp_addr(const struct sockaddr_in *sin1, const struct sockaddr_in *sin2)
 {
 	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
 }
@@ -214,7 +214,7 @@ nlm_cmp_addr(struct sockaddr_in *sin1, struct sockaddr_in *sin2)
  * When the second lock is of type F_UNLCK, this acts like a wildcard.
  */
 static __inline__ int
-nlm_compare_locks(struct file_lock *fl1, struct file_lock *fl2)
+nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2)
 {
 	return	fl1->fl_pid   == fl2->fl_pid
 	     && fl1->fl_start == fl2->fl_start
-- 
cgit v1.2.3


From 8b09fb34513225d87d511c7e8f29c0fd3cf860e0 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@SteelEye.com>
Date: Tue, 14 Feb 2006 13:53:05 -0800
Subject: [PATCH] fix x86 topology export in sysfs for subarchitectures

The correct way to export hyperthreading based functions is to predicate
them on CONFIG_X86_HT.  Without this, the topology exporting patch breaks
the build on all non-PC x86 subarchitectures.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/topology.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h
index af503a122b23..aa958c6ee83e 100644
--- a/include/asm-i386/topology.h
+++ b/include/asm-i386/topology.h
@@ -27,7 +27,7 @@
 #ifndef _ASM_I386_TOPOLOGY_H
 #define _ASM_I386_TOPOLOGY_H
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_X86_HT
 #define topology_physical_package_id(cpu)				\
 	(phys_proc_id[cpu] == BAD_APICID ? -1 : phys_proc_id[cpu])
 #define topology_core_id(cpu)						\
-- 
cgit v1.2.3


From f822566165dd46ff5de9bf895cfa6c51f53bb0c4 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@mellanox.co.il>
Date: Tue, 14 Feb 2006 13:53:08 -0800
Subject: [PATCH] madvise MADV_DONTFORK/MADV_DOFORK

Currently, copy-on-write may change the physical address of a page even if the
user requested that the page is pinned in memory (either by mlock or by
get_user_pages).  This happens if the process forks meanwhile, and the parent
writes to that page.  As a result, the page is orphaned: in case of
get_user_pages, the application will never see any data hardware DMA's into
this page after the COW.  In case of mlock'd memory, the parent is not getting
the realtime/security benefits of mlock.

In particular, this affects the Infiniband modules which do DMA from and into
user pages all the time.

This patch adds madvise options to control whether memory range is inherited
across fork.  Useful e.g.  for when hardware is doing DMA from/into these
pages.  Could also be useful to an application wanting to speed up its forks
by cutting large areas out of consideration.

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Acked-by: Hugh Dickins <hugh@veritas.com>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-alpha/mman.h   |  2 ++
 include/asm-arm/mman.h     |  2 ++
 include/asm-arm26/mman.h   |  2 ++
 include/asm-cris/mman.h    |  2 ++
 include/asm-frv/mman.h     |  2 ++
 include/asm-h8300/mman.h   |  2 ++
 include/asm-i386/mman.h    |  2 ++
 include/asm-ia64/mman.h    |  2 ++
 include/asm-m32r/mman.h    |  2 ++
 include/asm-m68k/mman.h    |  2 ++
 include/asm-mips/mman.h    |  2 ++
 include/asm-parisc/mman.h  |  2 ++
 include/asm-powerpc/mman.h |  2 ++
 include/asm-s390/mman.h    |  2 ++
 include/asm-sh/mman.h      |  2 ++
 include/asm-sparc/mman.h   |  2 ++
 include/asm-sparc64/mman.h |  2 ++
 include/asm-v850/mman.h    |  2 ++
 include/asm-x86_64/mman.h  |  2 ++
 include/asm-xtensa/mman.h  |  2 ++
 mm/madvise.c               | 21 +++++++++++++++++----
 21 files changed, 57 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-alpha/mman.h b/include/asm-alpha/mman.h
index f6439532a262..a21515c16a43 100644
--- a/include/asm-alpha/mman.h
+++ b/include/asm-alpha/mman.h
@@ -43,6 +43,8 @@
 #define	MADV_SPACEAVAIL	5		/* ensure resources are available */
 #define MADV_DONTNEED	6		/* don't need these pages */
 #define MADV_REMOVE	7		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-arm/mman.h b/include/asm-arm/mman.h
index f0bebca2ac21..693ed859e632 100644
--- a/include/asm-arm/mman.h
+++ b/include/asm-arm/mman.h
@@ -36,6 +36,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-arm26/mman.h b/include/asm-arm26/mman.h
index 0ed7780541fa..2096c50df888 100644
--- a/include/asm-arm26/mman.h
+++ b/include/asm-arm26/mman.h
@@ -36,6 +36,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-cris/mman.h b/include/asm-cris/mman.h
index 5a382b8bf3f7..deddfb239ff5 100644
--- a/include/asm-cris/mman.h
+++ b/include/asm-cris/mman.h
@@ -38,6 +38,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-frv/mman.h b/include/asm-frv/mman.h
index 8af4a41c255e..d3bca306da82 100644
--- a/include/asm-frv/mman.h
+++ b/include/asm-frv/mman.h
@@ -36,6 +36,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-h8300/mman.h b/include/asm-h8300/mman.h
index 744a8fb485c2..ac0346f7d11d 100644
--- a/include/asm-h8300/mman.h
+++ b/include/asm-h8300/mman.h
@@ -36,6 +36,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-i386/mman.h b/include/asm-i386/mman.h
index ba4941e6f643..ab2339a1d807 100644
--- a/include/asm-i386/mman.h
+++ b/include/asm-i386/mman.h
@@ -36,6 +36,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-ia64/mman.h b/include/asm-ia64/mman.h
index 828beb24a20e..357ebb780cc0 100644
--- a/include/asm-ia64/mman.h
+++ b/include/asm-ia64/mman.h
@@ -44,6 +44,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-m32r/mman.h b/include/asm-m32r/mman.h
index 12e29747bc84..6b02fe3fcff2 100644
--- a/include/asm-m32r/mman.h
+++ b/include/asm-m32r/mman.h
@@ -38,6 +38,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-m68k/mman.h b/include/asm-m68k/mman.h
index ea262ab88b3b..efd12bc4ccb7 100644
--- a/include/asm-m68k/mman.h
+++ b/include/asm-m68k/mman.h
@@ -36,6 +36,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-mips/mman.h b/include/asm-mips/mman.h
index dd17c8bd62a1..6d01e26830fa 100644
--- a/include/asm-mips/mman.h
+++ b/include/asm-mips/mman.h
@@ -66,6 +66,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON       MAP_ANONYMOUS
diff --git a/include/asm-parisc/mman.h b/include/asm-parisc/mman.h
index 736b0abcac05..a381cf5c8f55 100644
--- a/include/asm-parisc/mman.h
+++ b/include/asm-parisc/mman.h
@@ -49,6 +49,8 @@
 #define MADV_4M_PAGES   22              /* Use 4 Megabyte pages */
 #define MADV_16M_PAGES  24              /* Use 16 Megabyte pages */
 #define MADV_64M_PAGES  26              /* Use 64 Megabyte pages */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-powerpc/mman.h b/include/asm-powerpc/mman.h
index a2e34c21b44f..fcff25d13f13 100644
--- a/include/asm-powerpc/mman.h
+++ b/include/asm-powerpc/mman.h
@@ -45,6 +45,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-s390/mman.h b/include/asm-s390/mman.h
index c8d5409b5d56..d41ca1477010 100644
--- a/include/asm-s390/mman.h
+++ b/include/asm-s390/mman.h
@@ -44,6 +44,8 @@
 #define MADV_WILLNEED  0x3              /* pre-fault pages */
 #define MADV_DONTNEED  0x4              /* discard these pages */
 #define MADV_REMOVE    0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-sh/mman.h b/include/asm-sh/mman.h
index 693bd55a3710..0e08d0573abc 100644
--- a/include/asm-sh/mman.h
+++ b/include/asm-sh/mman.h
@@ -36,6 +36,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-sparc/mman.h b/include/asm-sparc/mman.h
index 98435ad8619e..4a298b2be859 100644
--- a/include/asm-sparc/mman.h
+++ b/include/asm-sparc/mman.h
@@ -55,6 +55,8 @@
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_FREE	0x5		/* (Solaris) contents can be freed */
 #define MADV_REMOVE	0x6		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-sparc64/mman.h b/include/asm-sparc64/mman.h
index cb4b6156194d..d705ec92da8b 100644
--- a/include/asm-sparc64/mman.h
+++ b/include/asm-sparc64/mman.h
@@ -55,6 +55,8 @@
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_FREE	0x5		/* (Solaris) contents can be freed */
 #define MADV_REMOVE	0x6		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-v850/mman.h b/include/asm-v850/mman.h
index edc79965193a..7b851c310e41 100644
--- a/include/asm-v850/mman.h
+++ b/include/asm-v850/mman.h
@@ -33,6 +33,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-x86_64/mman.h b/include/asm-x86_64/mman.h
index d0e97b74f735..b699a38c1c3c 100644
--- a/include/asm-x86_64/mman.h
+++ b/include/asm-x86_64/mman.h
@@ -37,6 +37,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-xtensa/mman.h b/include/asm-xtensa/mman.h
index 082a7504925e..e2d7afb679c8 100644
--- a/include/asm-xtensa/mman.h
+++ b/include/asm-xtensa/mman.h
@@ -73,6 +73,8 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_REMOVE	0x5		/* remove these pages & resources */
+#define MADV_DONTFORK	0x30		/* dont inherit across fork */
+#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON       MAP_ANONYMOUS
diff --git a/mm/madvise.c b/mm/madvise.c
index ae0ae3ea299a..af3d573b0141 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -22,16 +22,23 @@ static long madvise_behavior(struct vm_area_struct * vma,
 	struct mm_struct * mm = vma->vm_mm;
 	int error = 0;
 	pgoff_t pgoff;
-	int new_flags = vma->vm_flags & ~VM_READHINTMASK;
+	int new_flags = vma->vm_flags;
 
 	switch (behavior) {
+	case MADV_NORMAL:
+		new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
+		break;
 	case MADV_SEQUENTIAL:
-		new_flags |= VM_SEQ_READ;
+		new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ;
 		break;
 	case MADV_RANDOM:
-		new_flags |= VM_RAND_READ;
+		new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ;
 		break;
-	default:
+	case MADV_DONTFORK:
+		new_flags |= VM_DONTCOPY;
+		break;
+	case MADV_DOFORK:
+		new_flags &= ~VM_DONTCOPY;
 		break;
 	}
 
@@ -177,6 +184,12 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 	long error;
 
 	switch (behavior) {
+	case MADV_DOFORK:
+		if (vma->vm_flags & VM_IO) {
+			error = -EINVAL;
+			break;
+		}
+	case MADV_DONTFORK:
 	case MADV_NORMAL:
 	case MADV_SEQUENTIAL:
 	case MADV_RANDOM:
-- 
cgit v1.2.3


From d6077cb80cde4506720f9165eba99ee07438513f Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Tue, 14 Feb 2006 13:53:10 -0800
Subject: [PATCH] sched: revert "filter affine wakeups"

Revert commit d7102e95b7b9c00277562c29aad421d2d521c5f6:

    [PATCH] sched: filter affine wakeups

Apparently caused more than 10% performance regression for aim7 benchmark.
The setup in use is 16-cpu HP rx8620, 64Gb of memory and 12 MSA1000s with 144
disks.  Each disk is 72Gb with a single ext3 filesystem (courtesy of HP, who
supplied benchmark results).

The problem is, for aim7, the wake-up pattern is random, but it still needs
load balancing action in the wake-up path to achieve best performance.  With
the above commit, lack of load balancing hurts that workload.

However, for workloads like database transaction processing, the requirement
is exactly opposite.  In the wake up path, best performance is achieved with
absolutely zero load balancing.  We simply wake up the process on the CPU that
it was previously run.  Worst performance is obtained when we do load
balancing at wake up.

There isn't an easy way to auto detect the workload characteristics.  Ingo's
earlier patch that detects idle CPU and decide whether to load balance or not
doesn't perform with aim7 either since all CPUs are busy (it causes even
bigger perf.  regression).

Revert commit d7102e95b7b9c00277562c29aad421d2d521c5f6, which causes more
than 10% performance regression with aim7.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  5 +----
 kernel/sched.c        | 10 +---------
 2 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c1da0269a18..b6f51e3a38ec 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -697,11 +697,8 @@ struct task_struct {
 
 	int lock_depth;		/* BKL lock depth */
 
-#if defined(CONFIG_SMP)
-	int last_waker_cpu;	/* CPU that last woke this task up */
-#if defined(__ARCH_WANT_UNLOCKED_CTXSW)
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
 	int oncpu;
-#endif
 #endif
 	int prio, static_prio;
 	struct list_head run_list;
diff --git a/kernel/sched.c b/kernel/sched.c
index 87d93be336a1..66d957227de9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1204,9 +1204,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync)
 		}
 	}
 
-	if (p->last_waker_cpu != this_cpu)
-		goto out_set_cpu;
-
 	if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
 		goto out_set_cpu;
 
@@ -1277,8 +1274,6 @@ out_set_cpu:
 		cpu = task_cpu(p);
 	}
 
-	p->last_waker_cpu = this_cpu;
-
 out_activate:
 #endif /* CONFIG_SMP */
 	if (old_state == TASK_UNINTERRUPTIBLE) {
@@ -1360,12 +1355,9 @@ void fastcall sched_fork(task_t *p, int clone_flags)
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
-#if defined(CONFIG_SMP)
-	p->last_waker_cpu = cpu;
-#if defined(__ARCH_WANT_UNLOCKED_CTXSW)
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
 	p->oncpu = 0;
 #endif
-#endif
 #ifdef CONFIG_PREEMPT
 	/* Want to start with kernel preemption disabled. */
 	task_thread_info(p)->preempt_count = 1;
-- 
cgit v1.2.3


From 68f624fc8b9fa50de9cc0ebd612ef7b7b9fa32d0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 14 Feb 2006 13:53:18 -0800
Subject: [PATCH] FRV: Miscellaneous fixes

Make various alterations and fixes to the FRV arch:

 (1) Resyncs the FRV system call collection with the i386 arch.

 (2) Discards __iounmap() as it's not used.

 (3) Fixes the use of the SWAP/SWAPI instruction to get the arguments the right
     way around in atomic.h, and also to get the asm constraints correct.

 (4) Moves copy_to/from_user_page() to asm/cacheflush.h to be consistent with
     other archs.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/frv/kernel/entry.S      | 26 +++++++++++++++++++++-----
 arch/frv/mm/kmap.c           |  9 ---------
 include/asm-frv/atomic.h     |  6 +++---
 include/asm-frv/cacheflush.h | 12 ++++++++++++
 include/asm-frv/io.h         |  1 -
 include/asm-frv/uaccess.h    |  3 ---
 include/asm-frv/unistd.h     | 28 ++++++++++++++++++++++------
 7 files changed, 58 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 5f6548388b74..c69d499d28cf 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1418,11 +1418,27 @@ sys_call_table:
 	.long sys_add_key
 	.long sys_request_key
 	.long sys_keyctl
-	.long sys_ni_syscall // sys_vperfctr_open
-	.long sys_ni_syscall // sys_vperfctr_control	/* 290 */
-	.long sys_ni_syscall // sys_vperfctr_unlink
-	.long sys_ni_syscall // sys_vperfctr_iresume
-	.long sys_ni_syscall // sys_vperfctr_read
+	.long sys_ioprio_set
+	.long sys_ioprio_get		/* 290 */
+	.long sys_inotify_init
+	.long sys_inotify_add_watch
+	.long sys_inotify_rm_watch
+	.long sys_migrate_pages
+	.long sys_openat		/* 295 */
+	.long sys_mkdirat
+	.long sys_mknodat
+	.long sys_fchownat
+	.long sys_futimesat
+	.long sys_newfstatat		/* 300 */
+	.long sys_unlinkat
+	.long sys_renameat
+	.long sys_linkat
+	.long sys_symlinkat
+	.long sys_readlinkat		/* 305 */
+	.long sys_fchmodat
+	.long sys_faccessat
+	.long sys_pselect6
+	.long sys_ppoll
 
 
 syscall_table_size = (. - sys_call_table)
diff --git a/arch/frv/mm/kmap.c b/arch/frv/mm/kmap.c
index 539f45e6d15e..c54f18e65ea6 100644
--- a/arch/frv/mm/kmap.c
+++ b/arch/frv/mm/kmap.c
@@ -43,15 +43,6 @@ void iounmap(void *addr)
 {
 }
 
-/*
- * __iounmap unmaps nearly everything, so be careful
- * it doesn't free currently pointer/page tables anymore but it
- * wans't used anyway and might be added later.
- */
-void __iounmap(void *addr, unsigned long size)
-{
-}
-
 /*
  * Set new cache mode for some kernel address space.
  * The caller must push data for that range itself, if such data may already
diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h
index a59f684b4f33..5d9f84bfdcad 100644
--- a/include/asm-frv/atomic.h
+++ b/include/asm-frv/atomic.h
@@ -220,9 +220,9 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig
 	switch (sizeof(__xg_orig)) {						\
 	case 4:									\
 		asm volatile(							\
-			"swap%I0 %2,%M0"					\
-			: "+m"(*__xg_ptr), "=&r"(__xg_orig)			\
-			: "r"(x)						\
+			"swap%I0 %M0,%1"					\
+			: "+m"(*__xg_ptr), "=r"(__xg_orig)			\
+			: "1"(x)						\
 			: "memory"						\
 			);							\
 		break;								\
diff --git a/include/asm-frv/cacheflush.h b/include/asm-frv/cacheflush.h
index 3007deccb490..eaa5826bc1c8 100644
--- a/include/asm-frv/cacheflush.h
+++ b/include/asm-frv/cacheflush.h
@@ -87,5 +87,17 @@ static inline void flush_icache_page(struct vm_area_struct *vma, struct page *pa
 	flush_icache_user_range(vma, page, page_to_phys(page), PAGE_SIZE);
 }
 
+/*
+ * permit ptrace to access another process's address space through the icache
+ * and the dcache
+ */
+#define copy_to_user_page(vma, page, vaddr, dst, src, len)	\
+do {								\
+	memcpy((dst), (src), (len));				\
+	flush_icache_user_range((vma), (page), (vaddr), (len));	\
+} while(0)
+
+#define copy_from_user_page(vma, page, vaddr, dst, src, len)	\
+	memcpy((dst), (src), (len))
 
 #endif /* _ASM_CACHEFLUSH_H */
diff --git a/include/asm-frv/io.h b/include/asm-frv/io.h
index 075369b1a34b..01247cb2bc39 100644
--- a/include/asm-frv/io.h
+++ b/include/asm-frv/io.h
@@ -251,7 +251,6 @@ static inline void writel(uint32_t datum, volatile void __iomem *addr)
 #define IOMAP_WRITETHROUGH		3
 
 extern void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag);
-extern void __iounmap(void __iomem *addr, unsigned long size);
 
 static inline void __iomem *ioremap(unsigned long physaddr, unsigned long size)
 {
diff --git a/include/asm-frv/uaccess.h b/include/asm-frv/uaccess.h
index b6bcbe01f6ee..a1d140438863 100644
--- a/include/asm-frv/uaccess.h
+++ b/include/asm-frv/uaccess.h
@@ -306,7 +306,4 @@ extern long strnlen_user(const char *src, long count);
 
 extern unsigned long search_exception_table(unsigned long addr);
 
-#define copy_to_user_page(vma, page, vaddr, dst, src, len)	memcpy(dst, src, len)
-#define copy_from_user_page(vma, page, vaddr, dst, src, len)	memcpy(dst, src, len)
-
 #endif /* _ASM_UACCESS_H */
diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h
index 4d994d2e99e3..322531caa484 100644
--- a/include/asm-frv/unistd.h
+++ b/include/asm-frv/unistd.h
@@ -295,13 +295,29 @@
 #define __NR_add_key		286
 #define __NR_request_key	287
 #define __NR_keyctl		288
-#define __NR_vperfctr_open	289
-#define __NR_vperfctr_control	(__NR_perfctr_info+1)
-#define __NR_vperfctr_unlink	(__NR_perfctr_info+2)
-#define __NR_vperfctr_iresume	(__NR_perfctr_info+3)
-#define __NR_vperfctr_read	(__NR_perfctr_info+4)
+#define __NR_ioprio_set		289
+#define __NR_ioprio_get		290
+#define __NR_inotify_init	291
+#define __NR_inotify_add_watch	292
+#define __NR_inotify_rm_watch	293
+#define __NR_migrate_pages	294
+#define __NR_openat		295
+#define __NR_mkdirat		296
+#define __NR_mknodat		297
+#define __NR_fchownat		298
+#define __NR_futimesat		299
+#define __NR_newfstatat		300
+#define __NR_unlinkat		301
+#define __NR_renameat		302
+#define __NR_linkat		303
+#define __NR_symlinkat		304
+#define __NR_readlinkat		305
+#define __NR_fchmodat		306
+#define __NR_faccessat		307
+#define __NR_pselect6		308
+#define __NR_ppoll		309
 
-#define NR_syscalls 294
+#define NR_syscalls 310
 
 /*
  * process the return value of a syscall, consigning it to one of two possible fates
-- 
cgit v1.2.3


From 28baebae73c3ea8b75c7cae225a7db817ab825a9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 14 Feb 2006 13:53:20 -0800
Subject: [PATCH] FRV: Use virtual interrupt disablement

Make the FRV arch use virtual interrupt disablement because accesses to the
processor status register (PSR) are relatively slow and because we will
soon have the need to deal with multiple interrupt controls at the same
time (separate h/w and inter-core interrupts).

The way this is done is to dedicate one of the four integer condition code
registers (ICC2) to maintaining a virtual interrupt disablement state
whilst inside the kernel.  This uses the ICC2.Z flag (Zero) to indicate
whether the interrupts are virtually disabled and the ICC2.C flag (Carry)
to indicate whether the interrupts are physically disabled.

ICC2.Z is set to indicate interrupts are virtually disabled.  ICC2.C is set
to indicate interrupts are physically enabled.  Under normal running
conditions Z==0 and C==1.

Disabling interrupts with local_irq_disable() doesn't then actually
physically disable interrupts - it merely sets ICC2.Z to 1.  Should an
interrupt then happen, the exception prologue will note ICC2.Z is set and
branch out of line using one instruction (an unlikely BEQ).  Here it will
physically disable interrupts and clear ICC2.C.

When it comes time to enable interrupts (local_irq_enable()), this simply
clears the ICC2.Z flag and invokes a trap #2 if both Z and C flags are
clear (the HI integer condition).  This can be done with the TIHI
conditional trap instruction.

The trap then physically reenables interrupts and sets ICC2.C again.  Upon
returning the interrupt will be taken as interrupts will then be enabled.
Note that whilst processing the trap, the whole exceptions system is
disabled, and so an interrupt can't happen till it returns.

If no pending interrupt had happened, ICC2.C would still be set, the HI
condition would not be fulfilled, and no trap will happen.

Saving interrupts (local_irq_save) is simply a matter of pulling the ICC2.Z
flag out of the CCR register, shifting it down and masking it off.  This
gives a result of 0 if interrupts were enabled and 1 if they weren't.

Restoring interrupts (local_irq_restore) is then a matter of taking the
saved value mentioned previously and XOR'ing it against 1.  If it was one,
the result will be zero, and if it was zero the result will be non-zero.
This result is then used to affect the ICC2.Z flag directly (it is a
condition code flag after all).  An XOR instruction does not affect the
Carry flag, and so that bit of state is unchanged.  The two flags can then
be sampled to see if they're both zero using the trap (TIHI) as for the
unconditional reenablement (local_irq_enable).

This patch also:

 (1) Modifies the debugging stub (break.S) to handle single-stepping crossing
     into the trap #2 handler and into virtually disabled interrupts.

 (2) Removes superseded fixup pointers from the second instructions in the trap
     tables (there's no a separate fixup table for this).

 (3) Declares the trap #3 vector for use in .org directives in the trap table.

 (4) Moves irq_enter() and irq_exit() in do_IRQ() to avoid problems with
     virtual interrupt handling, and removes the duplicate code that has now
     been folded into irq_exit() (softirq and preemption handling).

 (5) Tells the compiler in the arch Makefile that ICC2 is now reserved.

 (6) Documents the in-kernel ABI, including the virtual interrupts.

 (7) Renames the old irq management functions to different names.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/fujitsu/frv/kernel-ABI.txt | 234 +++++++++++++++++++++++++++++++
 arch/frv/Makefile                        |   2 +-
 arch/frv/kernel/break.S                  |  77 +++++++++-
 arch/frv/kernel/entry-table.S            |  39 +++++-
 arch/frv/kernel/entry.S                  |  65 ++++++++-
 arch/frv/kernel/head.S                   |   3 +
 arch/frv/kernel/irq.c                    |  41 +-----
 include/asm-frv/spr-regs.h               |   1 +
 include/asm-frv/system.h                 |  88 +++++++++++-
 9 files changed, 489 insertions(+), 61 deletions(-)
 create mode 100644 Documentation/fujitsu/frv/kernel-ABI.txt

(limited to 'include')

diff --git a/Documentation/fujitsu/frv/kernel-ABI.txt b/Documentation/fujitsu/frv/kernel-ABI.txt
new file mode 100644
index 000000000000..0ed9b0a779bc
--- /dev/null
+++ b/Documentation/fujitsu/frv/kernel-ABI.txt
@@ -0,0 +1,234 @@
+				 =================================
+				 INTERNAL KERNEL ABI FOR FR-V ARCH
+				 =================================
+
+The internal FRV kernel ABI is not quite the same as the userspace ABI. A number of the registers
+are used for special purposed, and the ABI is not consistent between modules vs core, and MMU vs
+no-MMU.
+
+This partly stems from the fact that FRV CPUs do not have a separate supervisor stack pointer, and
+most of them do not have any scratch registers, thus requiring at least one general purpose
+register to be clobbered in such an event. Also, within the kernel core, it is possible to simply
+jump or call directly between functions using a relative offset. This cannot be extended to modules
+for the displacement is likely to be too far. Thus in modules the address of a function to call
+must be calculated in a register and then used, requiring two extra instructions.
+
+This document has the following sections:
+
+ (*) System call register ABI
+ (*) CPU operating modes
+ (*) Internal kernel-mode register ABI
+ (*) Internal debug-mode register ABI
+ (*) Virtual interrupt handling
+
+
+========================
+SYSTEM CALL REGISTER ABI
+========================
+
+When a system call is made, the following registers are effective:
+
+	REGISTERS	CALL			RETURN
+	===============	=======================	=======================
+	GR7		System call number	Preserved
+	GR8		Syscall arg #1		Return value
+	GR9-GR13	Syscall arg #2-6	Preserved
+
+
+===================
+CPU OPERATING MODES
+===================
+
+The FR-V CPU has three basic operating modes. In order of increasing capability:
+
+  (1) User mode.
+
+      Basic userspace running mode.
+
+  (2) Kernel mode.
+
+      Normal kernel mode. There are many additional control registers available that may be
+      accessed in this mode, in addition to all the stuff available to user mode. This has two
+      submodes:
+
+      (a) Exceptions enabled (PSR.T == 1).
+
+      	  Exceptions will invoke the appropriate normal kernel mode handler. On entry to the
+      	  handler, the PSR.T bit will be cleared.
+
+      (b) Exceptions disabled (PSR.T == 0).
+
+      	  No exceptions or interrupts may happen. Any mandatory exceptions will cause the CPU to
+      	  halt unless the CPU is told to jump into debug mode instead.
+
+  (3) Debug mode.
+
+      No exceptions may happen in this mode. Memory protection and management exceptions will be
+      flagged for later consideration, but the exception handler won't be invoked. Debugging traps
+      such as hardware breakpoints and watchpoints will be ignored. This mode is entered only by
+      debugging events obtained from the other two modes.
+
+      All kernel mode registers may be accessed, plus a few extra debugging specific registers.
+
+
+=================================
+INTERNAL KERNEL-MODE REGISTER ABI
+=================================
+
+There are a number of permanent register assignments that are set up by entry.S in the exception
+prologue. Note that there is a complete set of exception prologues for each of user->kernel
+transition and kernel->kernel transition. There are also user->debug and kernel->debug mode
+transition prologues.
+
+
+	REGISTER	FLAVOUR	USE
+	===============	=======	====================================================
+	GR1			Supervisor stack pointer
+	GR15			Current thread info pointer
+	GR16			GP-Rel base register for small data
+	GR28			Current exception frame pointer (__frame)
+	GR29			Current task pointer (current)
+	GR30			Destroyed by kernel mode entry
+	GR31		NOMMU	Destroyed by debug mode entry
+	GR31		MMU	Destroyed by TLB miss kernel mode entry
+	CCR.ICC2		Virtual interrupt disablement tracking
+	CCCR.CC3		Cleared by exception prologue (atomic op emulation)
+	SCR0		MMU	See mmu-layout.txt.
+	SCR1		MMU	See mmu-layout.txt.
+	SCR2		MMU	Save for EAR0 (destroyed by icache insns in debug mode)
+	SCR3		MMU	Save for GR31 during debug exceptions
+	DAMR/IAMR	NOMMU	Fixed memory protection layout.
+	DAMR/IAMR	MMU	See mmu-layout.txt.
+
+
+Certain registers are also used or modified across function calls:
+
+	REGISTER	CALL				RETURN
+	===============	===============================	===============================
+	GR0		Fixed Zero			-
+	GR2		Function call frame pointer
+	GR3		Special				Preserved
+	GR3-GR7		-				Clobbered
+	GR8		Function call arg #1		Return value (or clobbered)
+	GR9		Function call arg #2		Return value MSW (or clobbered)
+	GR10-GR13	Function call arg #3-#6		Clobbered
+	GR14		-				Clobbered
+	GR15-GR16	Special				Preserved
+	GR17-GR27	-				Preserved
+	GR28-GR31	Special				Only accessed explicitly
+	LR		Return address after CALL	Clobbered
+	CCR/CCCR	-				Mostly Clobbered
+
+
+================================
+INTERNAL DEBUG-MODE REGISTER ABI
+================================
+
+This is the same as the kernel-mode register ABI for functions calls. The difference is that in
+debug-mode there's a different stack and a different exception frame. Almost all the global
+registers from kernel-mode (including the stack pointer) may be changed.
+
+	REGISTER	FLAVOUR	USE
+	===============	=======	====================================================
+	GR1			Debug stack pointer
+	GR16			GP-Rel base register for small data
+	GR31			Current debug exception frame pointer (__debug_frame)
+	SCR3		MMU	Saved value of GR31
+
+
+Note that debug mode is able to interfere with the kernel's emulated atomic ops, so it must be
+exceedingly careful not to do any that would interact with the main kernel in this regard. Hence
+the debug mode code (gdbstub) is almost completely self-contained. The only external code used is
+the sprintf family of functions.
+
+Futhermore, break.S is so complicated because single-step mode does not switch off on entry to an
+exception. That means unless manually disabled, single-stepping will blithely go on stepping into
+things like interrupts. See gdbstub.txt for more information.
+
+
+==========================
+VIRTUAL INTERRUPT HANDLING
+==========================
+
+Because accesses to the PSR is so slow, and to disable interrupts we have to access it twice (once
+to read and once to write), we don't actually disable interrupts at all if we don't have to. What
+we do instead is use the ICC2 condition code flags to note virtual disablement, such that if we
+then do take an interrupt, we note the flag, really disable interrupts, set another flag and resume
+execution at the point the interrupt happened. Setting condition flags as a side effect of an
+arithmetic or logical instruction is really fast. This use of the ICC2 only occurs within the
+kernel - it does not affect userspace.
+
+The flags we use are:
+
+ (*) CCR.ICC2.Z [Zero flag]
+
+     Set to virtually disable interrupts, clear when interrupts are virtually enabled. Can be
+     modified by logical instructions without affecting the Carry flag.
+
+ (*) CCR.ICC2.C [Carry flag]
+
+     Clear to indicate hardware interrupts are really disabled, set otherwise.
+
+
+What happens is this:
+
+ (1) Normal kernel-mode operation.
+
+	ICC2.Z is 0, ICC2.C is 1.
+
+ (2) An interrupt occurs. The exception prologue examines ICC2.Z and determines that nothing needs
+     doing. This is done simply with an unlikely BEQ instruction.
+
+ (3) The interrupts are disabled (local_irq_disable)
+
+	ICC2.Z is set to 1.
+
+ (4) If interrupts were then re-enabled (local_irq_enable):
+
+	ICC2.Z would be set to 0.
+
+     A TIHI #2 instruction (trap #2 if condition HI - Z==0 && C==0) would be used to trap if
+     interrupts were now virtually enabled, but physically disabled - which they're not, so the
+     trap isn't taken. The kernel would then be back to state (1).
+
+ (5) An interrupt occurs. The exception prologue examines ICC2.Z and determines that the interrupt
+     shouldn't actually have happened. It jumps aside, and there disabled interrupts by setting
+     PSR.PIL to 14 and then it clears ICC2.C.
+
+ (6) If interrupts were then saved and disabled again (local_irq_save):
+
+	ICC2.Z would be shifted into the save variable and masked off (giving a 1).
+
+	ICC2.Z would then be set to 1 (thus unchanged), and ICC2.C would be unaffected (ie: 0).
+
+ (7) If interrupts were then restored from state (6) (local_irq_restore):
+
+	ICC2.Z would be set to indicate the result of XOR'ing the saved value (ie: 1) with 1, which
+	gives a result of 0 - thus leaving ICC2.Z set.
+
+	ICC2.C would remain unaffected (ie: 0).
+
+     A TIHI #2 instruction would be used to again assay the current state, but this would do
+     nothing as Z==1.
+
+ (8) If interrupts were then enabled (local_irq_enable):
+
+	ICC2.Z would be cleared. ICC2.C would be left unaffected. Both flags would now be 0.
+
+     A TIHI #2 instruction again issued to assay the current state would then trap as both Z==0
+     [interrupts virtually enabled] and C==0 [interrupts really disabled] would then be true.
+
+ (9) The trap #2 handler would simply enable hardware interrupts (set PSR.PIL to 0), set ICC2.C to
+     1 and return.
+
+(10) Immediately upon returning, the pending interrupt would be taken.
+
+(11) The interrupt handler would take the path of actually processing the interrupt (ICC2.Z is
+     clear, BEQ fails as per step (2)).
+
+(12) The interrupt handler would then set ICC2.C to 1 since hardware interrupts are definitely
+     enabled - or else the kernel wouldn't be here.
+
+(13) On return from the interrupt handler, things would be back to state (1).
+
+This trap (#2) is only available in kernel mode. In user mode it will result in SIGILL.
diff --git a/arch/frv/Makefile b/arch/frv/Makefile
index 90c0fb8d9dc3..d163747d17c0 100644
--- a/arch/frv/Makefile
+++ b/arch/frv/Makefile
@@ -81,7 +81,7 @@ endif
 # - reserve CC3 for use with atomic ops
 # - all the extra registers are dealt with only at context switch time
 CFLAGS		+= -mno-fdpic -mgpr-32 -msoft-float -mno-media
-CFLAGS		+= -ffixed-fcc3 -ffixed-cc3 -ffixed-gr15
+CFLAGS		+= -ffixed-fcc3 -ffixed-cc3 -ffixed-gr15 -ffixed-icc2
 AFLAGS		+= -mno-fdpic
 ASFLAGS		+= -mno-fdpic
 
diff --git a/arch/frv/kernel/break.S b/arch/frv/kernel/break.S
index 33233dc23e29..687c48d62dde 100644
--- a/arch/frv/kernel/break.S
+++ b/arch/frv/kernel/break.S
@@ -200,12 +200,20 @@ __break_step:
 	movsg		bpcsr,gr2
 	sethi.p		%hi(__entry_kernel_external_interrupt),gr3
 	setlo		%lo(__entry_kernel_external_interrupt),gr3
-	subcc		gr2,gr3,gr0,icc0
+	subcc.p		gr2,gr3,gr0,icc0
+	sethi		%hi(__entry_uspace_external_interrupt),gr3
+	setlo.p		%lo(__entry_uspace_external_interrupt),gr3
 	beq		icc0,#2,__break_step_kernel_external_interrupt
-	sethi.p		%hi(__entry_uspace_external_interrupt),gr3
-	setlo		%lo(__entry_uspace_external_interrupt),gr3
-	subcc		gr2,gr3,gr0,icc0
+	subcc.p		gr2,gr3,gr0,icc0
+	sethi		%hi(__entry_kernel_external_interrupt_virtually_disabled),gr3
+	setlo.p		%lo(__entry_kernel_external_interrupt_virtually_disabled),gr3
 	beq		icc0,#2,__break_step_uspace_external_interrupt
+	subcc.p		gr2,gr3,gr0,icc0
+	sethi		%hi(__entry_kernel_external_interrupt_virtual_reenable),gr3
+	setlo.p		%lo(__entry_kernel_external_interrupt_virtual_reenable),gr3
+	beq		icc0,#2,__break_step_kernel_external_interrupt_virtually_disabled
+	subcc		gr2,gr3,gr0,icc0
+	beq		icc0,#2,__break_step_kernel_external_interrupt_virtual_reenable
 
 	LEDS		0x2007,gr2
 
@@ -254,6 +262,9 @@ __break_step_kernel_softprog_interrupt:
 # step through an external interrupt from kernel mode
 	.globl		__break_step_kernel_external_interrupt
 __break_step_kernel_external_interrupt:
+	# deal with virtual interrupt disablement
+	beq		icc2,#0,__break_step_kernel_external_interrupt_virtually_disabled
+
 	sethi.p		%hi(__entry_kernel_external_interrupt_reentry),gr3
 	setlo		%lo(__entry_kernel_external_interrupt_reentry),gr3
 
@@ -294,6 +305,64 @@ __break_return_as_kernel_prologue:
 #endif
 	rett		#1
 
+# we single-stepped into an interrupt handler whilst interrupts were merely virtually disabled
+# need to really disable interrupts, set flag, fix up and return
+__break_step_kernel_external_interrupt_virtually_disabled:
+	movsg		psr,gr2
+	andi		gr2,#~PSR_PIL,gr2
+	ori		gr2,#PSR_PIL_14,gr2	/* debugging interrupts only */
+	movgs		gr2,psr
+
+	ldi		@(gr31,#REG_CCR),gr3
+	movgs		gr3,ccr
+	subcc.p		gr0,gr0,gr0,icc2	/* leave Z set, clear C */
+
+	# exceptions must've been enabled and we must've been in supervisor mode
+	setlos		BPSR_BET|BPSR_BS,gr3
+	movgs		gr3,bpsr
+
+	# return to where the interrupt happened
+	movsg		pcsr,gr2
+	movgs		gr2,bpcsr
+
+	lddi.p		@(gr31,#REG_GR(2)),gr2
+
+	xor		gr31,gr31,gr31
+	movgs		gr0,brr
+#ifdef CONFIG_MMU
+	movsg		scr3,gr31
+#endif
+	rett		#1
+
+# we stepped through into the virtual interrupt reenablement trap
+#
+# we also want to single step anyway, but after fixing up so that we get an event on the
+# instruction after the broken-into exception returns
+	.globl		__break_step_kernel_external_interrupt_virtual_reenable
+__break_step_kernel_external_interrupt_virtual_reenable:
+	movsg		psr,gr2
+	andi		gr2,#~PSR_PIL,gr2
+	movgs		gr2,psr
+
+	ldi		@(gr31,#REG_CCR),gr3
+	movgs		gr3,ccr
+	subicc		gr0,#1,gr0,icc2		/* clear Z, set C */
+
+	# save the adjusted ICC2
+	movsg		ccr,gr3
+	sti		gr3,@(gr31,#REG_CCR)
+
+	# exceptions must've been enabled and we must've been in supervisor mode
+	setlos		BPSR_BET|BPSR_BS,gr3
+	movgs		gr3,bpsr
+
+	# return to where the trap happened
+	movsg		pcsr,gr2
+	movgs		gr2,bpcsr
+
+	# and then process the single step
+	bra		__break_continue
+
 # step through an internal exception from uspace mode
 	.globl		__break_step_uspace_softprog_interrupt
 __break_step_uspace_softprog_interrupt:
diff --git a/arch/frv/kernel/entry-table.S b/arch/frv/kernel/entry-table.S
index 9b9243e2103c..81568acea9cd 100644
--- a/arch/frv/kernel/entry-table.S
+++ b/arch/frv/kernel/entry-table.S
@@ -116,6 +116,8 @@ __break_kerneltrap_fixup_table:
 	.long		__break_step_uspace_external_interrupt
 	.section .trap.kernel
 	.org		\tbr_tt
+	# deal with virtual interrupt disablement
+	beq		icc2,#0,__entry_kernel_external_interrupt_virtually_disabled
 	bra		__entry_kernel_external_interrupt
 	.section .trap.fixup.kernel
 	.org		\tbr_tt >> 2
@@ -259,25 +261,52 @@ __trap_fixup_kernel_data_tlb_miss:
 	.org		TBR_TT_TRAP0
 	.rept		127
 	bra		__entry_uspace_softprog_interrupt
-	bra		__break_step_uspace_softprog_interrupt
-	.long		0,0
+	.long		0,0,0
 	.endr
 	.org		TBR_TT_BREAK
 	bra		__entry_break
 	.long		0,0,0
 
+	.section	.trap.fixup.user
+	.org		TBR_TT_TRAP0 >> 2
+	.rept		127
+	.long		__break_step_uspace_softprog_interrupt
+	.endr
+	.org		TBR_TT_BREAK >> 2
+	.long		0
+
 	# miscellaneous kernel mode entry points
 	.section	.trap.kernel
 	.org		TBR_TT_TRAP0
-	.rept		127
 	bra		__entry_kernel_softprog_interrupt
-	bra		__break_step_kernel_softprog_interrupt
-	.long		0,0
+	.org		TBR_TT_TRAP1
+	bra		__entry_kernel_softprog_interrupt
+
+	# trap #2 in kernel - reenable interrupts
+	.org		TBR_TT_TRAP2
+	bra		__entry_kernel_external_interrupt_virtual_reenable
+
+	# miscellaneous kernel traps
+	.org		TBR_TT_TRAP3
+	.rept		124
+	bra		__entry_kernel_softprog_interrupt
+	.long		0,0,0
 	.endr
 	.org		TBR_TT_BREAK
 	bra		__entry_break
 	.long		0,0,0
 
+	.section	.trap.fixup.kernel
+	.org		TBR_TT_TRAP0 >> 2
+	.long		__break_step_kernel_softprog_interrupt
+	.long		__break_step_kernel_softprog_interrupt
+	.long		__break_step_kernel_external_interrupt_virtual_reenable
+	.rept		124
+	.long		__break_step_kernel_softprog_interrupt
+	.endr
+	.org		TBR_TT_BREAK >> 2
+	.long		0
+
 	# miscellaneous debug mode entry points
 	.section	.trap.break
 	.org		TBR_TT_BREAK
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index c69d499d28cf..1d21c8d34d8a 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -141,7 +141,10 @@ __entry_uspace_external_interrupt_reentry:
 
 	movsg		gner0,gr4
 	movsg		gner1,gr5
-	stdi		gr4,@(gr28,#REG_GNER0)
+	stdi.p		gr4,@(gr28,#REG_GNER0)
+
+	# interrupts start off fully disabled in the interrupt handler
+	subcc		gr0,gr0,gr0,icc2		/* set Z and clear C */
 
 	# set up kernel global registers
 	sethi.p		%hi(__kernel_current_task),gr5
@@ -193,9 +196,8 @@ __entry_uspace_external_interrupt_reentry:
         .type		__entry_kernel_external_interrupt,@function
 __entry_kernel_external_interrupt:
 	LEDS		0x6210
-
-	sub		sp,gr15,gr31
-	LEDS32
+//	sub		sp,gr15,gr31
+//	LEDS32
 
 	# set up the stack pointer
 	or.p		sp,gr0,gr30
@@ -231,7 +233,10 @@ __entry_kernel_external_interrupt_reentry:
 	stdi		gr24,@(gr28,#REG_GR(24))
 	stdi		gr26,@(gr28,#REG_GR(26))
 	sti		gr29,@(gr28,#REG_GR(29))
-	stdi		gr30,@(gr28,#REG_GR(30))
+	stdi.p		gr30,@(gr28,#REG_GR(30))
+
+	# note virtual interrupts will be fully enabled upon return
+	subicc		gr0,#1,gr0,icc2			/* clear Z, set C */
 
 	movsg		tbr ,gr20
 	movsg		psr ,gr22
@@ -267,7 +272,10 @@ __entry_kernel_external_interrupt_reentry:
 
 	movsg		gner0,gr4
 	movsg		gner1,gr5
-	stdi		gr4,@(gr28,#REG_GNER0)
+	stdi.p		gr4,@(gr28,#REG_GNER0)
+
+	# interrupts start off fully disabled in the interrupt handler
+	subcc		gr0,gr0,gr0,icc2			/* set Z and clear C */
 
 	# set the return address
 	sethi.p		%hi(__entry_return_from_kernel_interrupt),gr4
@@ -291,6 +299,45 @@ __entry_kernel_external_interrupt_reentry:
 
 	.size		__entry_kernel_external_interrupt,.-__entry_kernel_external_interrupt
 
+###############################################################################
+#
+# deal with interrupts that were actually virtually disabled
+# - we need to really disable them, flag the fact and return immediately
+# - if you change this, you must alter break.S also
+#
+###############################################################################
+	.balign		L1_CACHE_BYTES
+	.globl		__entry_kernel_external_interrupt_virtually_disabled
+	.type		__entry_kernel_external_interrupt_virtually_disabled,@function
+__entry_kernel_external_interrupt_virtually_disabled:
+	movsg		psr,gr30
+	andi		gr30,#~PSR_PIL,gr30
+	ori		gr30,#PSR_PIL_14,gr30		; debugging interrupts only
+	movgs		gr30,psr
+	subcc		gr0,gr0,gr0,icc2		; leave Z set, clear C
+	rett		#0
+
+	.size		__entry_kernel_external_interrupt_virtually_disabled,.-__entry_kernel_external_interrupt_virtually_disabled
+
+###############################################################################
+#
+# deal with re-enablement of interrupts that were pending when virtually re-enabled
+# - set ICC2.C, re-enable the real interrupts and return
+# - we can clear ICC2.Z because we shouldn't be here if it's not 0 [due to TIHI]
+# - if you change this, you must alter break.S also
+#
+###############################################################################
+	.balign		L1_CACHE_BYTES
+	.globl		__entry_kernel_external_interrupt_virtual_reenable
+	.type		__entry_kernel_external_interrupt_virtual_reenable,@function
+__entry_kernel_external_interrupt_virtual_reenable:
+	movsg		psr,gr30
+	andi		gr30,#~PSR_PIL,gr30		; re-enable interrupts
+	movgs		gr30,psr
+	subicc		gr0,#1,gr0,icc2			; clear Z, set C
+	rett		#0
+
+	.size		__entry_kernel_external_interrupt_virtual_reenable,.-__entry_kernel_external_interrupt_virtual_reenable
 
 ###############################################################################
 #
@@ -335,6 +382,7 @@ __entry_uspace_softprog_interrupt_reentry:
 
 	sethi.p		%hi(__entry_return_from_user_exception),gr23
 	setlo		%lo(__entry_return_from_user_exception),gr23
+
 	bra		__entry_common
 
 	.size		__entry_uspace_softprog_interrupt,.-__entry_uspace_softprog_interrupt
@@ -495,7 +543,10 @@ __entry_common:
 
 	movsg		gner0,gr4
 	movsg		gner1,gr5
-	stdi		gr4,@(gr28,#REG_GNER0)
+	stdi.p		gr4,@(gr28,#REG_GNER0)
+
+	# set up virtual interrupt disablement
+	subicc		gr0,#1,gr0,icc2			/* clear Z flag, set C flag */
 
 	# set up kernel global registers
 	sethi.p		%hi(__kernel_current_task),gr5
diff --git a/arch/frv/kernel/head.S b/arch/frv/kernel/head.S
index c73b4fe9f6ca..29a5265489b7 100644
--- a/arch/frv/kernel/head.S
+++ b/arch/frv/kernel/head.S
@@ -513,6 +513,9 @@ __head_mmu_enabled:
 	movgs		gr0,ccr
 	movgs		gr0,cccr
 
+	# initialise the virtual interrupt handling
+	subcc		gr0,gr0,gr0,icc2		/* set Z, clear C */
+
 #ifdef CONFIG_MMU
 	movgs		gr3,scr2
 	movgs		gr3,scr3
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index 59580c59c62c..27ab4c30aac6 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -287,18 +287,11 @@ asmlinkage void do_IRQ(void)
 	struct irq_source *source;
 	int level, cpu;
 
+	irq_enter();
+
 	level = (__frame->tbr >> 4) & 0xf;
 	cpu = smp_processor_id();
 
-#if 0
-	{
-		static u32 irqcount;
-		*(volatile u32 *) 0xe1200004 = ~((irqcount++ << 8) | level);
-		*(volatile u16 *) 0xffc00100 = (u16) ~0x9999;
-		mb();
-	}
-#endif
-
 	if ((unsigned long) __frame - (unsigned long) (current + 1) < 512)
 		BUG();
 
@@ -308,40 +301,12 @@ asmlinkage void do_IRQ(void)
 
 	kstat_this_cpu.irqs[level]++;
 
-	irq_enter();
-
 	for (source = frv_irq_levels[level].sources; source; source = source->next)
 		source->doirq(source);
 
-	irq_exit();
-
 	__clr_MASK(level);
 
-	/* only process softirqs if we didn't interrupt another interrupt handler */
-	if ((__frame->psr & PSR_PIL) == PSR_PIL_0)
-		if (local_softirq_pending())
-			do_softirq();
-
-#ifdef CONFIG_PREEMPT
-	local_irq_disable();
-	while (--current->preempt_count == 0) {
-		if (!(__frame->psr & PSR_S) ||
-		    current->need_resched == 0 ||
-		    in_interrupt())
-			break;
-		current->preempt_count++;
-		local_irq_enable();
-		preempt_schedule();
-		local_irq_disable();
-	}
-#endif
-
-#if 0
-	{
-		*(volatile u16 *) 0xffc00100 = (u16) ~0x6666;
-		mb();
-	}
-#endif
+	irq_exit();
 
 } /* end do_IRQ() */
 
diff --git a/include/asm-frv/spr-regs.h b/include/asm-frv/spr-regs.h
index ef472f058d9c..c2a541ef828d 100644
--- a/include/asm-frv/spr-regs.h
+++ b/include/asm-frv/spr-regs.h
@@ -98,6 +98,7 @@
 #define TBR_TT_TRAP0		(0x80 << 4)
 #define TBR_TT_TRAP1		(0x81 << 4)
 #define TBR_TT_TRAP2		(0x82 << 4)
+#define TBR_TT_TRAP3		(0x83 << 4)
 #define TBR_TT_TRAP126		(0xfe << 4)
 #define TBR_TT_BREAK		(0xff << 4)
 
diff --git a/include/asm-frv/system.h b/include/asm-frv/system.h
index d2aea70a5f64..f72ff0c4dc0b 100644
--- a/include/asm-frv/system.h
+++ b/include/asm-frv/system.h
@@ -40,8 +40,84 @@ do {									\
 
 /*
  * interrupt flag manipulation
+ * - use virtual interrupt management since touching the PSR is slow
+ *   - ICC2.Z: T if interrupts virtually disabled
+ *   - ICC2.C: F if interrupts really disabled
+ * - if Z==1 upon interrupt:
+ *   - C is set to 0
+ *   - interrupts are really disabled
+ *   - entry.S returns immediately
+ * - uses TIHI (TRAP if Z==0 && C==0) #2 to really reenable interrupts
+ *   - if taken, the trap:
+ *     - sets ICC2.C
+ *     - enables interrupts
  */
-#define local_irq_disable()				\
+#define local_irq_disable()					\
+do {								\
+	/* set Z flag, but don't change the C flag */		\
+	asm volatile("	andcc	gr0,gr0,gr0,icc2	\n"	\
+		     :						\
+		     :						\
+		     : "memory", "icc2"				\
+		     );						\
+} while(0)
+
+#define local_irq_enable()					\
+do {								\
+	/* clear Z flag and then test the C flag */		\
+	asm volatile("  oricc	gr0,#1,gr0,icc2		\n"	\
+		     "	tihi	icc2,gr0,#2		\n"	\
+		     :						\
+		     :						\
+		     : "memory", "icc2"				\
+		     );						\
+} while(0)
+
+#define local_save_flags(flags)					\
+do {								\
+	typecheck(unsigned long, flags);			\
+	asm volatile("movsg ccr,%0"				\
+		     : "=r"(flags)				\
+		     :						\
+		     : "memory");				\
+								\
+	/* shift ICC2.Z to bit 0 */				\
+	flags >>= 26;						\
+								\
+	/* make flags 1 if interrupts disabled, 0 otherwise */	\
+	flags &= 1UL;						\
+} while(0)
+
+#define irqs_disabled() \
+	({unsigned long flags; local_save_flags(flags); flags; })
+
+#define	local_irq_save(flags)			\
+do {						\
+	typecheck(unsigned long, flags);	\
+	local_save_flags(flags);		\
+	local_irq_disable();			\
+} while(0)
+
+#define	local_irq_restore(flags)					\
+do {									\
+	typecheck(unsigned long, flags);				\
+									\
+	/* load the Z flag by turning 1 if disabled into 0 if disabled	\
+	 * and thus setting the Z flag but not the C flag */		\
+	asm volatile("  xoricc	%0,#1,gr0,icc2		\n"		\
+		     /* then test Z=0 and C=0 */			\
+		     "	tihi	icc2,gr0,#2		\n"		\
+		     :							\
+		     : "r"(flags)					\
+		     : "memory", "icc2"					\
+		     );							\
+									\
+} while(0)
+
+/*
+ * real interrupt flag manipulation
+ */
+#define __local_irq_disable()				\
 do {							\
 	unsigned long psr;				\
 	asm volatile("	movsg	psr,%0		\n"	\
@@ -53,7 +129,7 @@ do {							\
 		     : "memory");			\
 } while(0)
 
-#define local_irq_enable()				\
+#define __local_irq_enable()				\
 do {							\
 	unsigned long psr;				\
 	asm volatile("	movsg	psr,%0		\n"	\
@@ -64,7 +140,7 @@ do {							\
 		     : "memory");			\
 } while(0)
 
-#define local_save_flags(flags)			\
+#define __local_save_flags(flags)		\
 do {						\
 	typecheck(unsigned long, flags);	\
 	asm("movsg psr,%0"			\
@@ -73,7 +149,7 @@ do {						\
 	    : "memory");			\
 } while(0)
 
-#define	local_irq_save(flags)				\
+#define	__local_irq_save(flags)				\
 do {							\
 	unsigned long npsr;				\
 	typecheck(unsigned long, flags);		\
@@ -86,7 +162,7 @@ do {							\
 		     : "memory");			\
 } while(0)
 
-#define	local_irq_restore(flags)			\
+#define	__local_irq_restore(flags)			\
 do {							\
 	typecheck(unsigned long, flags);		\
 	asm volatile("	movgs	%0,psr		\n"	\
@@ -95,7 +171,7 @@ do {							\
 		     : "memory");			\
 } while(0)
 
-#define irqs_disabled() \
+#define __irqs_disabled() \
 	((__get_PSR() & PSR_PIL) >= PSR_PIL_14)
 
 /*
-- 
cgit v1.2.3


From ee68cea2c26b7a8222f9020f54d22c6067011e8b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Feb 2006 01:34:23 -0800
Subject: [NETFILTER]: Fix xfrm lookup after SNAT

To find out if a packet needs to be handled by IPsec after SNAT, packets
are currently rerouted in POST_ROUTING and a new xfrm lookup is done. This
breaks SNAT of non-unicast packets to non-local addresses because the
packet is routed as incoming packet and no neighbour entry is bound to the
dst_entry. In general, it seems to be a bad idea to replace the dst_entry
after the packet was already sent to the output routine because its state
might not match what's expected.

This patch changes the xfrm lookup in POST_ROUTING to re-use the original
dst_entry without routing the packet again. This means no policy routing
can be used for transport mode transforms (which keep the original route)
when packets are SNATed to match the policy, but it looks like the best
we can do for now.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4.h         |  2 +-
 net/ipv4/netfilter.c                   | 41 ++++++++++++++++++++++++++++++++++
 net/ipv4/netfilter/ip_nat_standalone.c |  6 ++---
 3 files changed, 45 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index fdc4a9527343..43c09d790b83 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -79,7 +79,7 @@ enum nf_ip_hook_priorities {
 
 #ifdef __KERNEL__
 extern int ip_route_me_harder(struct sk_buff **pskb);
-
+extern int ip_xfrm_me_harder(struct sk_buff **pskb);
 #endif /*__KERNEL__*/
 
 #endif /*__LINUX_IP_NETFILTER_H*/
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 52a3d7c57907..ed42cdc57cd9 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -78,6 +78,47 @@ int ip_route_me_harder(struct sk_buff **pskb)
 }
 EXPORT_SYMBOL(ip_route_me_harder);
 
+#ifdef CONFIG_XFRM
+int ip_xfrm_me_harder(struct sk_buff **pskb)
+{
+	struct flowi fl;
+	unsigned int hh_len;
+	struct dst_entry *dst;
+
+	if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED)
+		return 0;
+	if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0)
+		return -1;
+
+	dst = (*pskb)->dst;
+	if (dst->xfrm)
+		dst = ((struct xfrm_dst *)dst)->route;
+	dst_hold(dst);
+
+	if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0)
+		return -1;
+
+	dst_release((*pskb)->dst);
+	(*pskb)->dst = dst;
+
+	/* Change in oif may mean change in hh_len. */
+	hh_len = (*pskb)->dst->dev->hard_header_len;
+	if (skb_headroom(*pskb) < hh_len) {
+		struct sk_buff *nskb;
+
+		nskb = skb_realloc_headroom(*pskb, hh_len);
+		if (!nskb)
+			return -1;
+		if ((*pskb)->sk)
+			skb_set_owner_w(nskb, (*pskb)->sk);
+		kfree_skb(*pskb);
+		*pskb = nskb;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ip_xfrm_me_harder);
+#endif
+
 void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
 EXPORT_SYMBOL(ip_nat_decode_session);
 
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 92c54999a19d..7c3f7d380240 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -235,19 +235,19 @@ ip_nat_out(unsigned int hooknum,
 		return NF_ACCEPT;
 
 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
+#ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN
 	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 
 		if (ct->tuplehash[dir].tuple.src.ip !=
 		    ct->tuplehash[!dir].tuple.dst.ip
-#ifdef CONFIG_XFRM
 		    || ct->tuplehash[dir].tuple.src.u.all !=
 		       ct->tuplehash[!dir].tuple.dst.u.all
-#endif
 		    )
-			return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+			return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
 	}
+#endif
 	return ret;
 }
 
-- 
cgit v1.2.3


From 5ecfbae093f0c37311e89b29bfc0c9d586eace87 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 15 Feb 2006 22:50:10 +0300
Subject: [PATCH] fix zap_thread's ptrace related problems

1. The tracee can go from ptrace_stop() to do_signal_stop()
   after __ptrace_unlink(p).

2. It is unsafe to __ptrace_unlink(p) while p->parent may wait
   for tasklist_lock in ptrace_detach().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c              |  2 +-
 include/linux/ptrace.h |  1 +
 kernel/ptrace.c        | 25 +++++++++++++++----------
 3 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 055378d2513e..0e1c95074d42 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1403,7 +1403,7 @@ static void zap_threads (struct mm_struct *mm)
 		do_each_thread(g,p) {
 			if (mm == p->mm && p != tsk &&
 			    p->ptrace && p->parent->mm == mm) {
-				__ptrace_unlink(p);
+				__ptrace_detach(p, 0);
 			}
 		} while_each_thread(g,p);
 		write_unlock_irq(&tasklist_lock);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 9d5cd106b344..0d36750fc0f1 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -84,6 +84,7 @@ extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __us
 extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
 extern int ptrace_attach(struct task_struct *tsk);
 extern int ptrace_detach(struct task_struct *, unsigned int);
+extern void __ptrace_detach(struct task_struct *, unsigned int);
 extern void ptrace_disable(struct task_struct *);
 extern int ptrace_check_attach(struct task_struct *task, int kill);
 extern int ptrace_request(struct task_struct *child, long request, long addr, long data);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index d2cf144d0af5..d95a72c9279d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child)
  */
 void __ptrace_unlink(task_t *child)
 {
-	if (!child->ptrace)
-		BUG();
+	BUG_ON(!child->ptrace);
+
 	child->ptrace = 0;
 	if (!list_empty(&child->ptrace_list)) {
 		list_del_init(&child->ptrace_list);
@@ -184,22 +184,27 @@ bad:
 	return retval;
 }
 
+void __ptrace_detach(struct task_struct *child, unsigned int data)
+{
+	child->exit_code = data;
+	/* .. re-parent .. */
+	__ptrace_unlink(child);
+	/* .. and wake it up. */
+	if (child->exit_state != EXIT_ZOMBIE)
+		wake_up_process(child);
+}
+
 int ptrace_detach(struct task_struct *child, unsigned int data)
 {
 	if (!valid_signal(data))
-		return	-EIO;
+		return -EIO;
 
 	/* Architecture-specific hardware disable .. */
 	ptrace_disable(child);
 
-	/* .. re-parent .. */
-	child->exit_code = data;
-
 	write_lock_irq(&tasklist_lock);
-	__ptrace_unlink(child);
-	/* .. and wake it up. */
-	if (child->exit_state != EXIT_ZOMBIE)
-		wake_up_process(child);
+	if (child->ptrace)
+		__ptrace_detach(child, data);
 	write_unlock_irq(&tasklist_lock);
 
 	return 0;
-- 
cgit v1.2.3


From 8ed9b2c7a804335004e4bd3b4c6989c5b6bc243f Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Mon, 13 Feb 2006 05:29:57 -0500
Subject: [IA64-SGI] sn2 minor fixes and cleanups

General SN2 code cleanup:
 - Do not initialize global variables to zero
 - Use kzalloc instead of kmalloc+memset
 - Check kmalloc return values
 - Do not obfuscate spin lock calls
 - Remove some unused code
 - Various formatting cleanups

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/io_init.c           | 96 ++++++++++++++-------------------
 arch/ia64/sn/kernel/sn2/prominfo_proc.c | 25 ++++-----
 arch/ia64/sn/kernel/sn2/sn2_smp.c       | 35 ++++++------
 arch/ia64/sn/kernel/sn2/sn_proc_fs.c    | 22 ++++----
 arch/ia64/sn/kernel/tiocx.c             |  4 +-
 arch/ia64/sn/pci/pci_dma.c              | 16 +++---
 arch/ia64/sn/pci/pcibr/pcibr_ate.c      | 29 +++-------
 arch/ia64/sn/pci/pcibr/pcibr_dma.c      | 14 ++---
 arch/ia64/sn/pci/pcibr/pcibr_provider.c |  9 ++--
 include/asm-ia64/sn/bte.h               |  6 +--
 include/asm-ia64/sn/pcibr_provider.h    | 14 +----
 include/asm-ia64/sn/sn_feature_sets.h   |  3 --
 12 files changed, 111 insertions(+), 162 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 3437c2390429..dfb3f2902379 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -23,6 +23,10 @@
 #include "xtalk/hubdev.h"
 #include "xtalk/xwidgetdev.h"
 
+
+extern void sn_init_cpei_timer(void);
+extern void register_sn_procfs(void);
+
 static struct list_head sn_sysdata_list;
 
 /* sysdata list struct */
@@ -40,12 +44,12 @@ struct brick {
 	struct slab_info slab_info[MAX_SLABS + 1];
 };
 
-int sn_ioif_inited = 0;		/* SN I/O infrastructure initialized? */
+int sn_ioif_inited;		/* SN I/O infrastructure initialized? */
 
 struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES];	/* indexed by asic type */
 
-static int max_segment_number = 0; /* Default highest segment number */
-static int max_pcibus_number = 255; /* Default highest pci bus number */
+static int max_segment_number;		 /* Default highest segment number */
+static int max_pcibus_number = 255;	/* Default highest pci bus number */
 
 /*
  * Hooks and struct for unsupported pci providers
@@ -84,7 +88,6 @@ static inline u64
 sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num,
 			     u64 address)
 {
-
 	struct ia64_sal_retval ret_stuff;
 	ret_stuff.status = 0;
 	ret_stuff.v0 = 0;
@@ -94,7 +97,6 @@ sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num,
 			(u64) nasid, (u64) widget_num,
 			(u64) device_num, (u64) address, 0, 0, 0);
 	return ret_stuff.status;
-
 }
 
 /*
@@ -102,7 +104,6 @@ sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num,
  */
 static inline u64 sal_get_hubdev_info(u64 handle, u64 address)
 {
-
 	struct ia64_sal_retval ret_stuff;
 	ret_stuff.status = 0;
 	ret_stuff.v0 = 0;
@@ -118,7 +119,6 @@ static inline u64 sal_get_hubdev_info(u64 handle, u64 address)
  */
 static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
 {
-
 	struct ia64_sal_retval ret_stuff;
 	ret_stuff.status = 0;
 	ret_stuff.v0 = 0;
@@ -215,7 +215,7 @@ static void __init sn_fixup_ionodes(void)
 	struct hubdev_info *hubdev;
 	u64 status;
 	u64 nasid;
-	int i, widget, device;
+	int i, widget, device, size;
 
 	/*
 	 * Get SGI Specific HUB chipset information.
@@ -251,48 +251,37 @@ static void __init sn_fixup_ionodes(void)
 		if (!hubdev->hdi_flush_nasid_list.widget_p)
 			continue;
 
+		size = (HUB_WIDGET_ID_MAX + 1) *
+			sizeof(struct sn_flush_device_kernel *);
 		hubdev->hdi_flush_nasid_list.widget_p =
-		    kmalloc((HUB_WIDGET_ID_MAX + 1) *
-			    sizeof(struct sn_flush_device_kernel *),
-			    GFP_KERNEL);
-		memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0,
-		       (HUB_WIDGET_ID_MAX + 1) *
-		       sizeof(struct sn_flush_device_kernel *));
+			kzalloc(size, GFP_KERNEL);
+		if (!hubdev->hdi_flush_nasid_list.widget_p)
+			BUG();
 
 		for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
-			sn_flush_device_kernel = kmalloc(DEV_PER_WIDGET *
-						         sizeof(struct
-						        sn_flush_device_kernel),
-						        GFP_KERNEL);
+			size = DEV_PER_WIDGET *
+				sizeof(struct sn_flush_device_kernel);
+			sn_flush_device_kernel = kzalloc(size, GFP_KERNEL);
 			if (!sn_flush_device_kernel)
 				BUG();
-			memset(sn_flush_device_kernel, 0x0,
-			       DEV_PER_WIDGET *
-			       sizeof(struct sn_flush_device_kernel));
 
 			dev_entry = sn_flush_device_kernel;
 			for (device = 0; device < DEV_PER_WIDGET;
 			     device++,dev_entry++) {
-				dev_entry->common = kmalloc(sizeof(struct
-					      	        sn_flush_device_common),
-					                    GFP_KERNEL);
+				size = sizeof(struct sn_flush_device_common);
+				dev_entry->common = kzalloc(size, GFP_KERNEL);
 				if (!dev_entry->common)
 					BUG();
-				memset(dev_entry->common, 0x0, sizeof(struct
-					     	       sn_flush_device_common));
 
 				if (sn_prom_feature_available(
 						       PRF_DEVICE_FLUSH_LIST))
 					status = sal_get_device_dmaflush_list(
-									  nasid,
-									 widget,
-								       	 device,
-						      (u64)(dev_entry->common));
+						     nasid, widget, device,
+						     (u64)(dev_entry->common));
 				else
 					status = sn_device_fixup_war(nasid,
-								     widget,
-							    	     device,
-							     dev_entry->common);
+						     widget, device,
+						     dev_entry->common);
 				if (status != SALRET_OK)
 					panic("SAL call failed: %s\n",
 					      ia64_sal_strerror(status));
@@ -383,13 +372,12 @@ void sn_pci_fixup_slot(struct pci_dev *dev)
 
 	pci_dev_get(dev); /* for the sysdata pointer */
 	pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL);
-	if (pcidev_info <= 0)
+	if (!pcidev_info)
 		BUG();		/* Cannot afford to run out of memory */
 
-	sn_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
-	if (sn_irq_info <= 0)
+	sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
+	if (!sn_irq_info)
 		BUG();		/* Cannot afford to run out of memory */
-	memset(sn_irq_info, 0, sizeof(struct sn_irq_info));
 
 	/* Call to retrieve pci device information needed by kernel. */
 	status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, 
@@ -482,13 +470,13 @@ void sn_pci_fixup_slot(struct pci_dev *dev)
  */
 void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
 {
-	int status = 0;
+	int status;
 	int nasid, cnode;
 	struct pci_controller *controller;
 	struct sn_pci_controller *sn_controller;
 	struct pcibus_bussoft *prom_bussoft_ptr;
 	struct hubdev_info *hubdev_info;
-	void *provider_soft = NULL;
+	void *provider_soft;
 	struct sn_pcibus_provider *provider;
 
  	status = sal_get_pcibus_info((u64) segment, (u64) busnum,
@@ -535,6 +523,8 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
 	bus->sysdata = controller;
 	if (provider->bus_fixup)
 		provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, controller);
+	else
+		provider_soft = NULL;
 
 	if (provider_soft == NULL) {
 		/* fixup failed or not applicable */
@@ -638,13 +628,8 @@ void sn_bus_free_sysdata(void)
 
 static int __init sn_pci_init(void)
 {
-	int i = 0;
-	int j = 0;
+	int i, j;
 	struct pci_dev *pci_dev = NULL;
-	extern void sn_init_cpei_timer(void);
-#ifdef CONFIG_PROC_FS
-	extern void register_sn_procfs(void);
-#endif
 
 	if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM())
 		return 0;
@@ -700,32 +685,29 @@ static int __init sn_pci_init(void)
  */
 void hubdev_init_node(nodepda_t * npda, cnodeid_t node)
 {
-
 	struct hubdev_info *hubdev_info;
+	int size;
+	pg_data_t *pg;
+
+	size = sizeof(struct hubdev_info);
 
 	if (node >= num_online_nodes())	/* Headless/memless IO nodes */
-		hubdev_info =
-		    (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(0),
-							     sizeof(struct
-								    hubdev_info));
+		pg = NODE_DATA(0);
 	else
-		hubdev_info =
-		    (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(node),
-							     sizeof(struct
-								    hubdev_info));
-	npda->pdinfo = (void *)hubdev_info;
+		pg = NODE_DATA(node);
 
+	hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size);
+
+	npda->pdinfo = (void *)hubdev_info;
 }
 
 geoid_t
 cnodeid_get_geoid(cnodeid_t cnode)
 {
-
 	struct hubdev_info *hubdev;
 
 	hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
 	return hubdev->hdi_geoid;
-
 }
 
 subsys_initcall(sn_pci_init);
diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
index 81c63b2f8ae9..6ae276d5d50c 100644
--- a/arch/ia64/sn/kernel/sn2/prominfo_proc.c
+++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (C) 1999,2001-2004, 2006 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * Module to export the system's Firmware Interface Tables, including
  * PROM revision numbers and banners, in /proc
@@ -190,7 +190,7 @@ static int
 read_version_entry(char *page, char **start, off_t off, int count, int *eof,
 		   void *data)
 {
-	int len = 0;
+	int len;
 
 	/* data holds the NASID of the node */
 	len = dump_version(page, (unsigned long)data);
@@ -202,7 +202,7 @@ static int
 read_fit_entry(char *page, char **start, off_t off, int count, int *eof,
 	       void *data)
 {
-	int len = 0;
+	int len;
 
 	/* data holds the NASID of the node */
 	len = dump_fit(page, (unsigned long)data);
@@ -229,13 +229,16 @@ int __init prominfo_init(void)
 	struct proc_dir_entry *p;
 	cnodeid_t cnodeid;
 	unsigned long nasid;
+	int size;
 	char name[NODE_NAME_LEN];
 
 	if (!ia64_platform_is("sn2"))
 		return 0;
 
-	proc_entries = kmalloc(num_online_nodes() * sizeof(struct proc_dir_entry *),
-			       GFP_KERNEL);
+	size = num_online_nodes() * sizeof(struct proc_dir_entry *);
+	proc_entries = kzalloc(size, GFP_KERNEL);
+	if (!proc_entries)
+		return -ENOMEM;
 
 	sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL);
 
@@ -244,14 +247,12 @@ int __init prominfo_init(void)
 		sprintf(name, "node%d", cnodeid);
 		*entp = proc_mkdir(name, sgi_prominfo_entry);
 		nasid = cnodeid_to_nasid(cnodeid);
-		p = create_proc_read_entry(
-			"fit", 0, *entp, read_fit_entry,
-			(void *)nasid);
+		p = create_proc_read_entry("fit", 0, *entp, read_fit_entry,
+					   (void *)nasid);
 		if (p)
 			p->owner = THIS_MODULE;
-		p = create_proc_read_entry(
-			"version", 0, *entp, read_version_entry,
-			(void *)nasid);
+		p = create_proc_read_entry("version", 0, *entp,
+					   read_version_entry, (void *)nasid);
 		if (p)
 			p->owner = THIS_MODULE;
 		entp++;
@@ -263,7 +264,7 @@ int __init prominfo_init(void)
 void __exit prominfo_exit(void)
 {
 	struct proc_dir_entry **entp;
-	unsigned cnodeid;
+	unsigned int cnodeid;
 	char name[NODE_NAME_LEN];
 
 	entp = proc_entries;
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index f153a4c35c70..24eefb2fc55f 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -46,8 +46,14 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats);
 
 static  __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
 
-void sn2_ptc_deadlock_recovery(short *, short, short, int, volatile unsigned long *, unsigned long,
-	volatile unsigned long *, unsigned long);
+extern unsigned long
+sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
+			       volatile unsigned long *, unsigned long,
+			       volatile unsigned long *, unsigned long);
+void
+sn2_ptc_deadlock_recovery(short *, short, short, int,
+			  volatile unsigned long *, unsigned long,
+			  volatile unsigned long *, unsigned long);
 
 /*
  * Note: some is the following is captured here to make degugging easier
@@ -59,16 +65,6 @@ void sn2_ptc_deadlock_recovery(short *, short, short, int, volatile unsigned lon
 #define reset_max_active_on_deadlock()	1
 #define PTC_LOCK(sh1)			((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock)
 
-static inline void ptc_lock(int sh1, unsigned long *flagp)
-{
-	spin_lock_irqsave(PTC_LOCK(sh1), *flagp);
-}
-
-static inline void ptc_unlock(int sh1, unsigned long flags)
-{
-	spin_unlock_irqrestore(PTC_LOCK(sh1), flags);
-}
-
 struct ptc_stats {
 	unsigned long ptc_l;
 	unsigned long change_rid;
@@ -82,6 +78,8 @@ struct ptc_stats {
 	unsigned long shub_ptc_flushes_not_my_mm;
 };
 
+#define sn2_ptctest	0
+
 static inline unsigned long wait_piowc(void)
 {
 	volatile unsigned long *piows;
@@ -200,7 +198,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 	max_active = max_active_pio(shub1);
 
 	itc = ia64_get_itc();
-	ptc_lock(shub1, &flags);
+	spin_lock_irqsave(PTC_LOCK(shub1), flags);
 	itc2 = ia64_get_itc();
 
 	__get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc;
@@ -258,7 +256,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 		ia64_srlz_d();
 	}
 
-	ptc_unlock(shub1, flags);
+	spin_unlock_irqrestore(PTC_LOCK(shub1), flags);
 
 	preempt_enable();
 }
@@ -270,11 +268,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
  * TLB flush transaction.  The recovery sequence is somewhat tricky & is
  * coded in assembly language.
  */
-void sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, volatile unsigned long *ptc0, unsigned long data0,
-	volatile unsigned long *ptc1, unsigned long data1)
+
+void
+sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid,
+			  volatile unsigned long *ptc0, unsigned long data0,
+			  volatile unsigned long *ptc1, unsigned long data1)
 {
-	extern unsigned long sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
-	        volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long);
 	short nasid, i;
 	unsigned long *piows, zeroval, n;
 
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index a06719d752a0..c686d9c12f7b 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -6,11 +6,11 @@
  * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
  */
 #include <linux/config.h>
-#include <asm/uaccess.h>
 
 #ifdef CONFIG_PROC_FS
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <asm/uaccess.h>
 #include <asm/sn/sn_sal.h>
 
 static int partition_id_show(struct seq_file *s, void *p)
@@ -90,10 +90,10 @@ static int coherence_id_open(struct inode *inode, struct file *file)
 	return single_open(file, coherence_id_show, NULL);
 }
 
-static struct proc_dir_entry *sn_procfs_create_entry(
-	const char *name, struct proc_dir_entry *parent,
-	int (*openfunc)(struct inode *, struct file *),
-	int (*releasefunc)(struct inode *, struct file *))
+static struct proc_dir_entry
+*sn_procfs_create_entry(const char *name, struct proc_dir_entry *parent,
+			int (*openfunc)(struct inode *, struct file *),
+			int (*releasefunc)(struct inode *, struct file *))
 {
 	struct proc_dir_entry *e = create_proc_entry(name, 0444, parent);
 
@@ -126,24 +126,24 @@ void register_sn_procfs(void)
 		return;
 
 	sn_procfs_create_entry("partition_id", sgi_proc_dir,
-		partition_id_open, single_release);
+			       partition_id_open, single_release);
 
 	sn_procfs_create_entry("system_serial_number", sgi_proc_dir,
-		system_serial_number_open, single_release);
+			       system_serial_number_open, single_release);
 
 	sn_procfs_create_entry("licenseID", sgi_proc_dir, 
-		licenseID_open, single_release);
+			       licenseID_open, single_release);
 
 	e = sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir, 
-		sn_force_interrupt_open, single_release);
+				   sn_force_interrupt_open, single_release);
 	if (e) 
 		e->proc_fops->write = sn_force_interrupt_write_proc;
 
 	sn_procfs_create_entry("coherence_id", sgi_proc_dir, 
-		coherence_id_open, single_release);
+			       coherence_id_open, single_release);
 	
 	sn_procfs_create_entry("sn_topology", sgi_proc_dir,
-		sn_topology_open, sn_topology_release);
+			       sn_topology_open, sn_topology_release);
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index d263d3e8fbb9..8a56f8b5ffa2 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -284,12 +284,10 @@ struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq,
 	if ((nasid & 1) == 0)
 		return NULL;
 
-	sn_irq_info = kmalloc(sn_irq_size, GFP_KERNEL);
+	sn_irq_info = kzalloc(sn_irq_size, GFP_KERNEL);
 	if (sn_irq_info == NULL)
 		return NULL;
 
-	memset(sn_irq_info, 0x0, sn_irq_size);
-
 	status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq,
 				  req_nasid, slice);
 	if (status) {
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 5a36292388eb..b4b84c269210 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -335,10 +335,10 @@ int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
 	 */
 
 	SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE,
-		pci_domain_nr(bus), bus->number,
-		0, /* io */
-		0, /* read */
-		port, size, __pa(val));
+		 pci_domain_nr(bus), bus->number,
+		 0, /* io */
+		 0, /* read */
+		 port, size, __pa(val));
 
 	if (isrv.status == 0)
 		return size;
@@ -381,10 +381,10 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
 	 */
 
 	SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE,
-		pci_domain_nr(bus), bus->number,
-		0, /* io */
-		1, /* write */
-		port, size, __pa(&val));
+		 pci_domain_nr(bus), bus->number,
+		 0, /* io */
+		 1, /* write */
+		 port, size, __pa(&val));
 
 	if (isrv.status == 0)
 		return size;
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
index aa3fa5152a32..1f0253bfe0a0 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/types.h>
@@ -12,7 +12,7 @@
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/pcidev.h>
 
-int pcibr_invalidate_ate = 0;	/* by default don't invalidate ATE on free */
+int pcibr_invalidate_ate;	/* by default don't invalidate ATE on free */
 
 /*
  * mark_ate: Mark the ate as either free or inuse.
@@ -20,14 +20,12 @@ int pcibr_invalidate_ate = 0;	/* by default don't invalidate ATE on free */
 static void mark_ate(struct ate_resource *ate_resource, int start, int number,
 		     u64 value)
 {
-
 	u64 *ate = ate_resource->ate;
 	int index;
 	int length = 0;
 
 	for (index = start; length < number; index++, length++)
 		ate[index] = value;
-
 }
 
 /*
@@ -37,7 +35,6 @@ static void mark_ate(struct ate_resource *ate_resource, int start, int number,
 static int find_free_ate(struct ate_resource *ate_resource, int start,
 			 int count)
 {
-
 	u64 *ate = ate_resource->ate;
 	int index;
 	int start_free;
@@ -70,12 +67,10 @@ static int find_free_ate(struct ate_resource *ate_resource, int start,
 static inline void free_ate_resource(struct ate_resource *ate_resource,
 				     int start)
 {
-
 	mark_ate(ate_resource, start, ate_resource->ate[start], 0);
 	if ((ate_resource->lowest_free_index > start) ||
 	    (ate_resource->lowest_free_index < 0))
 		ate_resource->lowest_free_index = start;
-
 }
 
 /*
@@ -84,7 +79,6 @@ static inline void free_ate_resource(struct ate_resource *ate_resource,
 static inline int alloc_ate_resource(struct ate_resource *ate_resource,
 				     int ate_needed)
 {
-
 	int start_index;
 
 	/*
@@ -118,19 +112,12 @@ static inline int alloc_ate_resource(struct ate_resource *ate_resource,
  */
 int pcibr_ate_alloc(struct pcibus_info *pcibus_info, int count)
 {
-	int status = 0;
-	u64 flag;
+	int status;
+	unsigned long flags;
 
-	flag = pcibr_lock(pcibus_info);
+	spin_lock_irqsave(&pcibus_info->pbi_lock, flags);
 	status = alloc_ate_resource(&pcibus_info->pbi_int_ate_resource, count);
-
-	if (status < 0) {
-		/* Failed to allocate */
-		pcibr_unlock(pcibus_info, flag);
-		return -1;
-	}
-
-	pcibr_unlock(pcibus_info, flag);
+	spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags);
 
 	return status;
 }
@@ -182,7 +169,7 @@ void pcibr_ate_free(struct pcibus_info *pcibus_info, int index)
 		ate_write(pcibus_info, index, count, (ate & ~PCI32_ATE_V));
 	}
 
-	flags = pcibr_lock(pcibus_info);
+	spin_lock_irqsave(&pcibus_info->pbi_lock, flags);
 	free_ate_resource(&pcibus_info->pbi_int_ate_resource, index);
-	pcibr_unlock(pcibus_info, flags);
+	spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags);
 }
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
index 54ce5b7ceed2..9f86bb6519aa 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -137,14 +137,12 @@ pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr,
 		pci_addr |= PCI64_ATTR_VIRTUAL;
 
 	return pci_addr;
-
 }
 
 static dma_addr_t
 pcibr_dmatrans_direct32(struct pcidev_info * info,
 			u64 paddr, size_t req_size, u64 flags)
 {
-
 	struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info;
 	struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
 	    pdi_pcibus_info;
@@ -171,7 +169,6 @@ pcibr_dmatrans_direct32(struct pcidev_info * info,
 	}
 
 	return PCI32_DIRECT_BASE | offset;
-
 }
 
 /*
@@ -218,9 +215,8 @@ void sn_dma_flush(u64 addr)
 	u64 flags;
 	u64 itte;
 	struct hubdev_info *hubinfo;
-	volatile struct sn_flush_device_kernel *p;
-	volatile struct sn_flush_device_common *common;
-
+	struct sn_flush_device_kernel *p;
+	struct sn_flush_device_common *common;
 	struct sn_flush_nasid_entry *flush_nasid_list;
 
 	if (!sn_ioif_inited)
@@ -310,8 +306,7 @@ void sn_dma_flush(u64 addr)
 					     (common->sfdl_slot - 1));
 		}
 	} else {
-		spin_lock_irqsave((spinlock_t *)&p->sfdl_flush_lock,
-				  flags);
+		spin_lock_irqsave(&p->sfdl_flush_lock, flags);
 		*common->sfdl_flush_addr = 0;
 
 		/* force an interrupt. */
@@ -322,8 +317,7 @@ void sn_dma_flush(u64 addr)
 			cpu_relax();
 
 		/* okay, everything is synched up. */
-		spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock,
-				       flags);
+		spin_unlock_irqrestore(&p->sfdl_flush_lock, flags);
 	}
 	return;
 }
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index 2fac27049bf6..98f716bd92f0 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -163,9 +163,12 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 	/* Setup the PMU ATE map */
 	soft->pbi_int_ate_resource.lowest_free_index = 0;
 	soft->pbi_int_ate_resource.ate =
-	    kmalloc(soft->pbi_int_ate_size * sizeof(u64), GFP_KERNEL);
-	memset(soft->pbi_int_ate_resource.ate, 0,
- 	       (soft->pbi_int_ate_size * sizeof(u64)));
+	    kzalloc(soft->pbi_int_ate_size * sizeof(u64), GFP_KERNEL);
+
+	if (!soft->pbi_int_ate_resource.ate) {
+		kfree(soft);
+		return NULL;
+	}
 
 	if (prom_bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) {
 		/* TIO PCI Bridge: find nearest node with CPUs */
diff --git a/include/asm-ia64/sn/bte.h b/include/asm-ia64/sn/bte.h
index 01e5b4103235..5335d87ca5f8 100644
--- a/include/asm-ia64/sn/bte.h
+++ b/include/asm-ia64/sn/bte.h
@@ -46,7 +46,7 @@
 #define BTES_PER_NODE (is_shub2() ? 4 : 2)
 #define MAX_BTES_PER_NODE 4
 
-#define BTE2OFF_CTRL	(0)
+#define BTE2OFF_CTRL	0
 #define BTE2OFF_SRC	(SH2_BT_ENG_SRC_ADDR_0 - SH2_BT_ENG_CSR_0)
 #define BTE2OFF_DEST	(SH2_BT_ENG_DEST_ADDR_0 - SH2_BT_ENG_CSR_0)
 #define BTE2OFF_NOTIFY	(SH2_BT_ENG_NOTIF_ADDR_0 - SH2_BT_ENG_CSR_0)
@@ -75,11 +75,11 @@
 		: base + (BTEOFF_NOTIFY/8))
 
 /* Define hardware modes */
-#define BTE_NOTIFY (IBCT_NOTIFY)
+#define BTE_NOTIFY IBCT_NOTIFY
 #define BTE_NORMAL BTE_NOTIFY
 #define BTE_ZERO_FILL (BTE_NOTIFY | IBCT_ZFIL_MODE)
 /* Use a reserved bit to let the caller specify a wait for any BTE */
-#define BTE_WACQUIRE (0x4000)
+#define BTE_WACQUIRE 0x4000
 /* Use the BTE on the node with the destination memory */
 #define BTE_USE_DEST (BTE_WACQUIRE << 1)
 /* Use any available BTE interface on any node for the transfer */
diff --git a/include/asm-ia64/sn/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h
index 9334078b089a..a601d3af39b6 100644
--- a/include/asm-ia64/sn/pcibr_provider.h
+++ b/include/asm-ia64/sn/pcibr_provider.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1992-1997,2000-2006 Silicon Graphics, Inc. All rights reserved.
  */
 #ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
 #define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
@@ -115,18 +115,6 @@ struct pcibus_info {
 	spinlock_t              pbi_lock;
 };
 
-/*
- * pcibus_info structure locking macros
- */
-inline static unsigned long
-pcibr_lock(struct pcibus_info *pcibus_info)
-{
-	unsigned long flag;
-	spin_lock_irqsave(&pcibus_info->pbi_lock, flag);
-	return(flag);
-}
-#define pcibr_unlock(pcibus_info, flag)  spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag)
-
 extern int  pcibr_init_provider(void);
 extern void *pcibr_bus_fixup(struct pcibus_bussoft *, struct pci_controller *);
 extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t);
diff --git a/include/asm-ia64/sn/sn_feature_sets.h b/include/asm-ia64/sn/sn_feature_sets.h
index 9ca642cad338..ff33e3bd3f8e 100644
--- a/include/asm-ia64/sn/sn_feature_sets.h
+++ b/include/asm-ia64/sn/sn_feature_sets.h
@@ -12,9 +12,6 @@
  */
 
 
-#include <asm/types.h>
-#include <asm/bitops.h>
-
 /* --------------------- PROM Features -----------------------------*/
 extern int sn_prom_feature_available(int id);
 
-- 
cgit v1.2.3


From d3454344b3507042e5d561d0cfed19e99cf2fc88 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Mon, 13 Feb 2006 05:32:09 -0500
Subject: [IA64] remove obsolete corporate address

Remove obsolete SGI address

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/sn2/timer_interrupt.c | 7 +------
 drivers/ide/pci/sgiioc4.c                 | 5 -----
 include/asm-ia64/machvec_sn2.h            | 7 +------
 3 files changed, 2 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c
index adf5db2e2afe..fa7f69945917 100644
--- a/arch/ia64/sn/kernel/sn2/timer_interrupt.c
+++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c
@@ -1,7 +1,7 @@
 /*
  *
  *
- * Copyright (c) 2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2005, 2006 Silicon Graphics, Inc.  All Rights Reserved.
  * 
  * This program is free software; you can redistribute it and/or modify it 
  * under the terms of version 2 of the GNU General Public License 
@@ -22,11 +22,6 @@
  * License along with this program; if not, write the Free Software 
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
  * 
- * Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, 
- * Mountain View, CA  94043, or:
- * 
- * http://www.sgi.com 
- * 
  * For further information regarding this notice, see: 
  * 
  * http://oss.sgi.com/projects/GenInfo/NoticeExplan
diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index 2b286e865163..43b96e298363 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -13,11 +13,6 @@
  * License along with this program; if not, write the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
  *
- * Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
  * For further information regarding this notice, see:
  *
  * http://oss.sgi.com/projects/GenInfo/NoticeExplan
diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h
index e1b6cd63f49e..03d00faf03b5 100644
--- a/include/asm-ia64/machvec_sn2.h
+++ b/include/asm-ia64/machvec_sn2.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2002-2003, 2006 Silicon Graphics, Inc.  All Rights Reserved.
  * 
  * This program is free software; you can redistribute it and/or modify it 
  * under the terms of version 2 of the GNU General Public License 
@@ -20,11 +20,6 @@
  * License along with this program; if not, write the Free Software 
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
  * 
- * Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, 
- * Mountain View, CA  94043, or:
- * 
- * http://www.sgi.com 
- * 
  * For further information regarding this notice, see: 
  * 
  * http://oss.sgi.com/projects/GenInfo/NoticeExplan
-- 
cgit v1.2.3


From c2a4969ba14e852bf4ee92c7db3b0cf82405a0c9 Mon Sep 17 00:00:00 2001
From: Dean Roe <roe@sgi.com>
Date: Tue, 14 Feb 2006 15:01:23 -0600
Subject: [IA64-SGI] fix the size of __sn_cnodeid_to_nasid

The __sn_cnodeid_to_nasid array was incorrectly sized at MAX_NUMNODES.
On a large system, this array could overflow.  The following patch
corrects this by defining it to MAX_COMPACT_NODES.

Signed-off-by: Dean Roe <roe@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/setup.c | 2 +-
 include/asm-ia64/sn/arch.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 1672ecad77e2..5b84836c2171 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
 EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
 
-DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]);
+DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
 EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid);
 
 DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda);
diff --git a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h
index 1a3831c04af6..91c31be87b13 100644
--- a/include/asm-ia64/sn/arch.h
+++ b/include/asm-ia64/sn/arch.h
@@ -70,7 +70,7 @@ DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
  * Compact node ID to nasid mappings kept in the per-cpu data areas of each
  * cpu.
  */
-DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]);
+DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
 #define sn_cnodeid_to_nasid	(&__get_cpu_var(__sn_cnodeid_to_nasid[0]))
 
 
-- 
cgit v1.2.3


From 4c2cd96696ae0896ce4bcf725b9f0eaffafeb640 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Wed, 15 Feb 2006 08:02:21 -0600
Subject: [IA64-SGI] enforce proper ordering of callouts by XPC

Fix XPC so that it does not deliver any messages until the connected
callout has returned, as well as, prevent the disconnected callout to
occur before the disconnecting callout has returned.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/xpc_channel.c |  8 +++++---
 arch/ia64/sn/kernel/xpc_main.c    | 20 +++++++++++++-------
 include/asm-ia64/sn/xpc.h         | 31 ++++++++++++++++++-------------
 3 files changed, 36 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index 36e5437a0fb6..cdf6856ce089 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -738,7 +738,9 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	/* make sure all activity has settled down first */
 
-	if (atomic_read(&ch->references) > 0) {
+	if (atomic_read(&ch->references) > 0 ||
+			((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+			!(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE))) {
 		return;
 	}
 	DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
@@ -775,7 +777,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	/* both sides are disconnected now */
 
-	if (ch->flags & XPC_C_CONNECTCALLOUT) {
+	if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) {
 		spin_unlock_irqrestore(&ch->lock, *irq_flags);
 		xpc_disconnect_callout(ch, xpcDisconnected);
 		spin_lock_irqsave(&ch->lock, *irq_flags);
@@ -1300,7 +1302,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
 				"delivered=%d, partid=%d, channel=%d\n",
 				nmsgs_sent, ch->partid, ch->number);
 
-			if (ch->flags & XPC_C_CONNECTCALLOUT) {
+			if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
 				xpc_activate_kthreads(ch, nmsgs_sent);
 			}
 		}
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index 9cd460dfe27e..8cbf16432570 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -750,12 +750,16 @@ xpc_daemonize_kthread(void *args)
 		/* let registerer know that connection has been established */
 
 		spin_lock_irqsave(&ch->lock, irq_flags);
-		if (!(ch->flags & XPC_C_CONNECTCALLOUT)) {
-			ch->flags |= XPC_C_CONNECTCALLOUT;
+		if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
+			ch->flags |= XPC_C_CONNECTEDCALLOUT;
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 
 			xpc_connected_callout(ch);
 
+			spin_lock_irqsave(&ch->lock, irq_flags);
+			ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
 			/*
 			 * It is possible that while the callout was being
 			 * made that the remote partition sent some messages.
@@ -777,15 +781,17 @@ xpc_daemonize_kthread(void *args)
 
 	if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
 		spin_lock_irqsave(&ch->lock, irq_flags);
-		if ((ch->flags & XPC_C_CONNECTCALLOUT) &&
-				!(ch->flags & XPC_C_DISCONNECTCALLOUT)) {
-			ch->flags |= XPC_C_DISCONNECTCALLOUT;
+		if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+				!(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+			ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 
 			xpc_disconnect_callout(ch, xpcDisconnecting);
-		} else {
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+			spin_lock_irqsave(&ch->lock, irq_flags);
+			ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
 		}
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
 		if (atomic_dec_return(&part->nchannels_engaged) == 0) {
 			xpc_mark_partition_disengaged(part);
 			xpc_IPI_send_disengage(part);
diff --git a/include/asm-ia64/sn/xpc.h b/include/asm-ia64/sn/xpc.h
index 0c36928ffd8b..df7f5f4f3cde 100644
--- a/include/asm-ia64/sn/xpc.h
+++ b/include/asm-ia64/sn/xpc.h
@@ -508,19 +508,24 @@ struct xpc_channel {
 #define	XPC_C_OPENREQUEST	0x00000010 /* local open channel request */
 
 #define	XPC_C_SETUP		0x00000020 /* channel's msgqueues are alloc'd */
-#define	XPC_C_CONNECTCALLOUT	0x00000040 /* channel connected callout made */
-#define	XPC_C_CONNECTED		0x00000080 /* local channel is connected */
-#define	XPC_C_CONNECTING	0x00000100 /* channel is being connected */
-
-#define	XPC_C_RCLOSEREPLY	0x00000200 /* remote close channel reply */
-#define	XPC_C_CLOSEREPLY	0x00000400 /* local close channel reply */
-#define	XPC_C_RCLOSEREQUEST	0x00000800 /* remote close channel request */
-#define	XPC_C_CLOSEREQUEST	0x00001000 /* local close channel request */
-
-#define	XPC_C_DISCONNECTED	0x00002000 /* channel is disconnected */
-#define	XPC_C_DISCONNECTING	0x00004000 /* channel is being disconnected */
-#define	XPC_C_DISCONNECTCALLOUT	0x00008000 /* chan disconnected callout made */
-#define	XPC_C_WDISCONNECT	0x00010000 /* waiting for channel disconnect */
+#define	XPC_C_CONNECTEDCALLOUT	0x00000040 /* connected callout initiated */
+#define	XPC_C_CONNECTEDCALLOUT_MADE \
+				0x00000080 /* connected callout completed */
+#define	XPC_C_CONNECTED		0x00000100 /* local channel is connected */
+#define	XPC_C_CONNECTING	0x00000200 /* channel is being connected */
+
+#define	XPC_C_RCLOSEREPLY	0x00000400 /* remote close channel reply */
+#define	XPC_C_CLOSEREPLY	0x00000800 /* local close channel reply */
+#define	XPC_C_RCLOSEREQUEST	0x00001000 /* remote close channel request */
+#define	XPC_C_CLOSEREQUEST	0x00002000 /* local close channel request */
+
+#define	XPC_C_DISCONNECTED	0x00004000 /* channel is disconnected */
+#define	XPC_C_DISCONNECTING	0x00008000 /* channel is being disconnected */
+#define	XPC_C_DISCONNECTINGCALLOUT \
+				0x00010000 /* disconnecting callout initiated */
+#define	XPC_C_DISCONNECTINGCALLOUT_MADE \
+				0x00020000 /* disconnecting callout completed */
+#define	XPC_C_WDISCONNECT	0x00040000 /* waiting for channel disconnect */
 
 
-- 
cgit v1.2.3


From defbb2c929cbe89dc92239b303cd33d3c85e9a83 Mon Sep 17 00:00:00 2001
From: "hawkes@sgi.com" <hawkes@sgi.com>
Date: Tue, 14 Feb 2006 10:40:17 -0800
Subject: [IA64] ia64: simplify and fix udelay()

The original ia64 udelay() was simple, but flawed for platforms without
synchronized ITCs:  a preemption and migration to another CPU during the
while-loop likely resulted in too-early termination or very, very
lengthy looping.

The first fix (now in 2.6.15) broke the delay loop into smaller,
non-preemptible chunks, reenabling preemption between the chunks.  This
fix is flawed in that the total udelay is computed to be the sum of just
the non-premptible while-loop pieces, i.e., not counting the time spent
in the interim preemptible periods.  If an interrupt or a migration
occurs during one of these interim periods, then that time is invisible
and only serves to lengthen the effective udelay().

This new fix backs out the current flawed fix and returns to a simple
udelay(), fully preemptible and interruptible.  It implements two simple
alternative udelay() routines:  one a default generic version that uses
ia64_get_itc(), and the other an sn-specific version that uses that
platform's RTC.

Signed-off-by: John Hawkes <hawkes@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/time.c         | 39 +++++++++++++++++----------------------
 arch/ia64/sn/kernel/sn2/timer.c | 19 +++++++++++++++++++
 include/asm-ia64/timex.h        |  2 ++
 3 files changed, 38 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index a094ec49ccfa..307d01e15b2e 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -250,32 +250,27 @@ time_init (void)
 	set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
 }
 
-#define SMALLUSECS 100
-
-void
-udelay (unsigned long usecs)
+/*
+ * Generic udelay assumes that if preemption is allowed and the thread
+ * migrates to another CPU, that the ITC values are synchronized across
+ * all CPUs.
+ */
+static void
+ia64_itc_udelay (unsigned long usecs)
 {
-	unsigned long start;
-	unsigned long cycles;
-	unsigned long smallusecs;
+	unsigned long start = ia64_get_itc();
+	unsigned long end = start + usecs*local_cpu_data->cyc_per_usec;
 
-	/*
-	 * Execute the non-preemptible delay loop (because the ITC might
-	 * not be synchronized between CPUS) in relatively short time
-	 * chunks, allowing preemption between the chunks.
-	 */
-	while (usecs > 0) {
-		smallusecs = (usecs > SMALLUSECS) ? SMALLUSECS : usecs;
-		preempt_disable();
-		cycles = smallusecs*local_cpu_data->cyc_per_usec;
-		start = ia64_get_itc();
+	while (time_before(ia64_get_itc(), end))
+		cpu_relax();
+}
 
-		while (ia64_get_itc() - start < cycles)
-			cpu_relax();
+void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay;
 
-		preempt_enable();
-		usecs -= smallusecs;
-	}
+void
+udelay (unsigned long usecs)
+{
+	(*ia64_udelay)(usecs);
 }
 EXPORT_SYMBOL(udelay);
 
diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c
index deb9baf4d473..56a88b6df4b4 100644
--- a/arch/ia64/sn/kernel/sn2/timer.c
+++ b/arch/ia64/sn/kernel/sn2/timer.c
@@ -14,6 +14,7 @@
 
 #include <asm/hw_irq.h>
 #include <asm/system.h>
+#include <asm/timex.h>
 
 #include <asm/sn/leds.h>
 #include <asm/sn/shub_mmr.h>
@@ -28,9 +29,27 @@ static struct time_interpolator sn2_interpolator = {
 	.source = TIME_SOURCE_MMIO64
 };
 
+/*
+ * sn udelay uses the RTC instead of the ITC because the ITC is not
+ * synchronized across all CPUs, and the thread may migrate to another CPU
+ * if preemption is enabled.
+ */
+static void
+ia64_sn_udelay (unsigned long usecs)
+{
+	unsigned long start = rtc_time();
+	unsigned long end = start +
+			usecs * sn_rtc_cycles_per_second / 1000000;
+
+	while (time_before((unsigned long)rtc_time(), end))
+		cpu_relax();
+}
+
 void __init sn_timer_init(void)
 {
 	sn2_interpolator.frequency = sn_rtc_cycles_per_second;
 	sn2_interpolator.addr = RTC_COUNTER_ADDR;
 	register_time_interpolator(&sn2_interpolator);
+
+	ia64_udelay = &ia64_sn_udelay;
 }
diff --git a/include/asm-ia64/timex.h b/include/asm-ia64/timex.h
index 414aae060440..05a6baf8a472 100644
--- a/include/asm-ia64/timex.h
+++ b/include/asm-ia64/timex.h
@@ -15,6 +15,8 @@
 
 typedef unsigned long cycles_t;
 
+extern void (*ia64_udelay)(unsigned long usecs);
+
 /*
  * For performance reasons, we don't want to define CLOCK_TICK_TRATE as
  * local_cpu_data->itc_rate.  Fortunately, we don't have to, either: according to George
-- 
cgit v1.2.3


From 48d5cad87c3a4998d0bda16ccfb5c60dfe4de5fb Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Feb 2006 15:10:22 -0800
Subject: [XFRM]: Fix SNAT-related crash in xfrm4_output_finish

When a packet matching an IPsec policy is SNATed so it doesn't match any
policy anymore it looses its xfrm bundle, which makes xfrm4_output_finish
crash because of a NULL pointer dereference.

This patch directs these packets to the original output path instead. Since
the packets have already passed the POST_ROUTING hook, but need to start at
the beginning of the original output path which includes another
POST_ROUTING invocation, a flag is added to the IPCB to indicate that the
packet was rerouted and doesn't need to pass the POST_ROUTING hook again.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 19 +++++++++++++++----
 include/net/ip.h          |  1 +
 include/net/xfrm.h        |  1 -
 net/ipv4/ip_gre.c         |  3 ++-
 net/ipv4/ip_output.c      | 16 ++++++++++------
 net/ipv4/ipip.c           |  3 ++-
 net/ipv4/xfrm4_output.c   | 13 ++++++++++---
 7 files changed, 40 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 4cf6088625c1..3ca3d9ee78a9 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -184,8 +184,11 @@ static inline int nf_hook_thresh(int pf, unsigned int hook,
 				 struct sk_buff **pskb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
-				 int (*okfn)(struct sk_buff *), int thresh)
+				 int (*okfn)(struct sk_buff *), int thresh,
+				 int cond)
 {
+	if (!cond)
+		return 1;
 #ifndef CONFIG_NETFILTER_DEBUG
 	if (list_empty(&nf_hooks[pf][hook]))
 		return 1;
@@ -197,7 +200,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sk_buff *))
 {
-	return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN);
+	return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN, 1);
 }
                    
 /* Activate hook; either okfn or kfree_skb called, unless a hook
@@ -224,7 +227,13 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
 
 #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh)	       \
 ({int __ret;								       \
-if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)\
+if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\
+	__ret = (okfn)(skb);						       \
+__ret;})
+
+#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond)		       \
+({int __ret;								       \
+if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\
 	__ret = (okfn)(skb);						       \
 __ret;})
 
@@ -295,11 +304,13 @@ extern struct proc_dir_entry *proc_net_netfilter;
 
 #else /* !CONFIG_NETFILTER */
 #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
+#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
 static inline int nf_hook_thresh(int pf, unsigned int hook,
 				 struct sk_buff **pskb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
-				 int (*okfn)(struct sk_buff *), int thresh)
+				 int (*okfn)(struct sk_buff *), int thresh,
+				 int cond)
 {
 	return okfn(*pskb);
 }
diff --git a/include/net/ip.h b/include/net/ip.h
index 8de0697b364c..fab3d5b3ab1c 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -41,6 +41,7 @@ struct inet_skb_parm
 #define IPSKB_XFRM_TUNNEL_SIZE	2
 #define IPSKB_XFRM_TRANSFORMED	4
 #define IPSKB_FRAG_COMPLETE	8
+#define IPSKB_REROUTED		16
 };
 
 struct ipcm_cookie
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index d09ca0e7d139..d6111a2f0a23 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -866,7 +866,6 @@ extern int xfrm_state_mtu(struct xfrm_state *x, int mtu);
 extern int xfrm_init_state(struct xfrm_state *x);
 extern int xfrm4_rcv(struct sk_buff *skb);
 extern int xfrm4_output(struct sk_buff *skb);
-extern int xfrm4_output_finish(struct sk_buff *skb);
 extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler);
 extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler);
 extern int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index abe23923e4e7..9981dcd68f11 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -830,7 +830,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb->h.raw = skb->nh.raw;
 	skb->nh.raw = skb_push(skb, gre_hlen);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+			      IPSKB_REROUTED);
 	dst_release(skb->dst);
 	skb->dst = &rt->u.dst;
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3324fbfe528a..57d290d89ec2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -207,8 +207,10 @@ static inline int ip_finish_output(struct sk_buff *skb)
 {
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 	/* Policy lookup after SNAT yielded a new policy */
-	if (skb->dst->xfrm != NULL)
-		return xfrm4_output_finish(skb);
+	if (skb->dst->xfrm != NULL) {
+		IPCB(skb)->flags |= IPSKB_REROUTED;
+		return dst_output(skb);
+	}
 #endif
 	if (skb->len > dst_mtu(skb->dst) &&
 	    !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
@@ -271,8 +273,9 @@ int ip_mc_output(struct sk_buff *skb)
 				newskb->dev, ip_dev_loopback_xmit);
 	}
 
-	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
-		       ip_finish_output);
+	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
+			    ip_finish_output,
+			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
 int ip_output(struct sk_buff *skb)
@@ -284,8 +287,9 @@ int ip_output(struct sk_buff *skb)
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
 
-	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
-		       ip_finish_output);
+	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
+		            ip_finish_output,
+			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e5cbe72c6b80..03d13742a4b8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -622,7 +622,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb->h.raw = skb->nh.raw;
 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+			      IPSKB_REROUTED);
 	dst_release(skb->dst);
 	skb->dst = &rt->u.dst;
 
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index d4df0ddd424b..32ad229b4fed 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -152,10 +152,16 @@ error_nolock:
 	goto out_exit;
 }
 
-int xfrm4_output_finish(struct sk_buff *skb)
+static int xfrm4_output_finish(struct sk_buff *skb)
 {
 	int err;
 
+#ifdef CONFIG_NETFILTER
+	if (!skb->dst->xfrm) {
+		IPCB(skb)->flags |= IPSKB_REROUTED;
+		return dst_output(skb);
+	}
+#endif
 	while (likely((err = xfrm4_output_one(skb)) == 0)) {
 		nf_reset(skb);
 
@@ -178,6 +184,7 @@ int xfrm4_output_finish(struct sk_buff *skb)
 
 int xfrm4_output(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
-		       xfrm4_output_finish);
+	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
+			    xfrm4_output_finish,
+			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
-- 
cgit v1.2.3


From 9c92d3486434e7310cb288587953e2dae4a79701 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Feb 2006 15:18:19 -0800
Subject: [NETFILTER]: Don't invoke okfn in CONFIG_NETFILTER=n variant of
 nf_hook()

nf_hook() is supposed to call the netfilter hook and return control of the
packet back to the caller in case it may pass, the okfn is only used for
queueing.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 3ca3d9ee78a9..468896939843 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -318,7 +318,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sk_buff *))
 {
-	return okfn(*pskb);
+	return 1;
 }
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
 struct flowi;
-- 
cgit v1.2.3


From 9f672004ab1a8094bec1785b39ac683ab9eebebc Mon Sep 17 00:00:00 2001
From: Christian Trefzer <ctrefzer@gmx.de>
Date: Wed, 15 Feb 2006 15:17:34 -0800
Subject: [PATCH] neofb: avoid resetting display config on unblank (v2)

There were two mistakes in the register-read-on-(un)blank approach.

- First, without proper register (un)locking the value read back will always
  be zero, and this is what I missed entirely until just now.  Due to this,
  the logic could not be verified at all and I tried some bogus checks which
  are completely stupid.

- Second, the LCD status bit will always be set to zero when the backlight
  has been turned off.  Reading the value back during unblank will disable the
  LCD unconditionally, regardless of the state it is supposed to be in, since
  we set it to zero beforehand.

So this is what we do now:

- create a new variable in struct neofb_par, and use that to determine
  whether to read back registers (initialized to true)

- before actually blanking the screen, read back the register to sense any
  possible change made through Fn key combo

- use proper neoUnlock() / neoLock() to actually read something

- every call to neofb_blank() determines if we read back next time: blanking
  disables readback, unblanking (FB_BLANK_UNBLANK) enables it

This should give us a nice and clean state machine.  Has been thoroughly
tested on a Dell Latitude CPiA / NM220 Chip docked to a C/Dock2 with attached
CRT in all possible combinations of LCD/CRT on/off.  I changed the config via
Fn key, let the console blank, unblanked by keypress - works flawlessly.

Signed-off-by: Christian Trefzer <ctrefzer@gmx.de>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/video/neofb.c    | 15 ++++++++++++---
 include/video/neomagic.h |  1 +
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c
index b85e2b180a44..a2e201dc40f7 100644
--- a/drivers/video/neofb.c
+++ b/drivers/video/neofb.c
@@ -843,6 +843,9 @@ static int neofb_set_par(struct fb_info *info)
 
 	par->SysIfaceCntl2 = 0xc0;	/* VESA Bios sets this to 0x80! */
 
+	/* Initialize: by default, we want display config register to be read */
+	par->PanelDispCntlRegRead = 1;
+
 	/* Enable any user specified display devices. */
 	par->PanelDispCntlReg1 = 0x00;
 	if (par->internal_display)
@@ -1334,11 +1337,17 @@ static int neofb_blank(int blank_mode, struct fb_info *info)
 	struct neofb_par *par = info->par;
 	int seqflags, lcdflags, dpmsflags, reg;
 
+
 	/*
-	 * Reload the value stored in the register, might have been changed via
-	 * FN keystroke
+	 * Reload the value stored in the register, if sensible. It might have
+	 * been changed via FN keystroke.
 	 */
-	par->PanelDispCntlReg1 = vga_rgfx(NULL, 0x20) & 0x03;
+	if (par->PanelDispCntlRegRead) {
+		neoUnlock();
+		par->PanelDispCntlReg1 = vga_rgfx(NULL, 0x20) & 0x03;
+		neoLock(&par->state);
+	}
+	par->PanelDispCntlRegRead = !blank_mode;
 
 	switch (blank_mode) {
 	case FB_BLANK_POWERDOWN:	/* powerdown - both sync lines down */
diff --git a/include/video/neomagic.h b/include/video/neomagic.h
index 1d69049bd4c1..78b1f15a538f 100644
--- a/include/video/neomagic.h
+++ b/include/video/neomagic.h
@@ -159,6 +159,7 @@ struct neofb_par {
 	unsigned char PanelDispCntlReg1;
 	unsigned char PanelDispCntlReg2;
 	unsigned char PanelDispCntlReg3;
+	unsigned char PanelDispCntlRegRead;
 	unsigned char PanelVertCenterReg1;
 	unsigned char PanelVertCenterReg2;
 	unsigned char PanelVertCenterReg3;
-- 
cgit v1.2.3


From 5f6164f3092832e0d9b12eed52e09a76bf39c64a Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@mellanox.co.il>
Date: Wed, 15 Feb 2006 15:17:39 -0800
Subject: [PATCH] add asm-generic/mman.h

Make new MADV_REMOVE, MADV_DONTFORK, MADV_DOFORK consistent across all
arches.  The idea is to make it possible to use them portably even before
distros include them in libc headers.

Move common flags to asm-generic/mman.h

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Cc: Roland Dreier <rolandd@cisco.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-alpha/mman.h   |  8 +++++---
 include/asm-arm/mman.h     | 31 +------------------------------
 include/asm-arm26/mman.h   | 31 +------------------------------
 include/asm-cris/mman.h    | 31 +------------------------------
 include/asm-frv/mman.h     | 31 +------------------------------
 include/asm-generic/mman.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/asm-h8300/mman.h   | 31 +------------------------------
 include/asm-i386/mman.h    | 31 +------------------------------
 include/asm-ia64/mman.h    | 31 +------------------------------
 include/asm-m32r/mman.h    | 33 ++-------------------------------
 include/asm-m68k/mman.h    | 31 +------------------------------
 include/asm-mips/mman.h    | 22 ++++++++++++----------
 include/asm-parisc/mman.h  |  8 +++++---
 include/asm-powerpc/mman.h | 32 ++------------------------------
 include/asm-s390/mman.h    | 31 +------------------------------
 include/asm-sh/mman.h      | 31 +------------------------------
 include/asm-sparc/mman.h   | 31 ++-----------------------------
 include/asm-sparc64/mman.h | 31 ++-----------------------------
 include/asm-v850/mman.h    | 30 +-----------------------------
 include/asm-x86_64/mman.h  | 30 +-----------------------------
 include/asm-xtensa/mman.h  | 22 ++++++++++++----------
 21 files changed, 96 insertions(+), 503 deletions(-)
 create mode 100644 include/asm-generic/mman.h

(limited to 'include')

diff --git a/include/asm-alpha/mman.h b/include/asm-alpha/mman.h
index a21515c16a43..5f24c755f577 100644
--- a/include/asm-alpha/mman.h
+++ b/include/asm-alpha/mman.h
@@ -42,9 +42,11 @@
 #define MADV_WILLNEED	3		/* will need these pages */
 #define	MADV_SPACEAVAIL	5		/* ensure resources are available */
 #define MADV_DONTNEED	6		/* don't need these pages */
-#define MADV_REMOVE	7		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
+
+/* common/generic parameters */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-arm/mman.h b/include/asm-arm/mman.h
index 693ed859e632..54570d2e95b7 100644
--- a/include/asm-arm/mman.h
+++ b/include/asm-arm/mman.h
@@ -1,19 +1,7 @@
 #ifndef __ARM_MMAN_H__
 #define __ARM_MMAN_H__
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#include <asm-generic/mman.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
@@ -23,24 +11,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) page tables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif /* __ARM_MMAN_H__ */
diff --git a/include/asm-arm26/mman.h b/include/asm-arm26/mman.h
index 2096c50df888..4000a6c1b76b 100644
--- a/include/asm-arm26/mman.h
+++ b/include/asm-arm26/mman.h
@@ -1,19 +1,7 @@
 #ifndef __ARM_MMAN_H__
 #define __ARM_MMAN_H__
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#include <asm-generic/mman.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
@@ -23,24 +11,7 @@
 #define MAP_POPULATE    0x8000          /* populate (prefault) page tables */
 #define MAP_NONBLOCK    0x10000         /* do not block on IO */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif /* __ARM_MMAN_H__ */
diff --git a/include/asm-cris/mman.h b/include/asm-cris/mman.h
index deddfb239ff5..1c35e1b66b46 100644
--- a/include/asm-cris/mman.h
+++ b/include/asm-cris/mman.h
@@ -3,19 +3,7 @@
 
 /* verbatim copy of asm-i386/ version */
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#include <asm-generic/mman.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
@@ -25,24 +13,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif /* __CRIS_MMAN_H__ */
diff --git a/include/asm-frv/mman.h b/include/asm-frv/mman.h
index d3bca306da82..b4371e928683 100644
--- a/include/asm-frv/mman.h
+++ b/include/asm-frv/mman.h
@@ -1,19 +1,7 @@
 #ifndef __ASM_MMAN_H__
 #define __ASM_MMAN_H__
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#include <asm-generic/mman.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
@@ -23,25 +11,8 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif /* __ASM_MMAN_H__ */
 
diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h
new file mode 100644
index 000000000000..3b41d2bb70da
--- /dev/null
+++ b/include/asm-generic/mman.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_GENERIC_MMAN_H
+#define _ASM_GENERIC_MMAN_H
+
+/*
+ Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd.
+ Based on: asm-xxx/mman.h
+*/
+
+#define PROT_READ	0x1		/* page can be read */
+#define PROT_WRITE	0x2		/* page can be written */
+#define PROT_EXEC	0x4		/* page can be executed */
+#define PROT_SEM	0x8		/* page may be used for atomic ops */
+#define PROT_NONE	0x0		/* page can not be accessed */
+#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+
+#define MAP_SHARED	0x01		/* Share changes */
+#define MAP_PRIVATE	0x02		/* Changes are private */
+#define MAP_TYPE	0x0f		/* Mask for type of mapping */
+#define MAP_FIXED	0x10		/* Interpret addr exactly */
+#define MAP_ANONYMOUS	0x20		/* don't use a file */
+
+#define MS_ASYNC	1		/* sync memory asynchronously */
+#define MS_INVALIDATE	2		/* invalidate the caches */
+#define MS_SYNC		4		/* synchronous memory sync */
+
+#define MADV_NORMAL	0		/* no further special treatment */
+#define MADV_RANDOM	1		/* expect random page references */
+#define MADV_SEQUENTIAL	2		/* expect sequential page references */
+#define MADV_WILLNEED	3		/* will need these pages */
+#define MADV_DONTNEED	4		/* don't need these pages */
+
+/* common parameters: try to keep these consistent across architectures */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
+
+/* compatibility flags */
+#define MAP_ANON	MAP_ANONYMOUS
+#define MAP_FILE	0
+
+#endif
diff --git a/include/asm-h8300/mman.h b/include/asm-h8300/mman.h
index ac0346f7d11d..b9f104f22a36 100644
--- a/include/asm-h8300/mman.h
+++ b/include/asm-h8300/mman.h
@@ -1,19 +1,7 @@
 #ifndef __H8300_MMAN_H__
 #define __H8300_MMAN_H__
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#include <asm-generic/mman.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
@@ -23,24 +11,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif /* __H8300_MMAN_H__ */
diff --git a/include/asm-i386/mman.h b/include/asm-i386/mman.h
index ab2339a1d807..8fd9d7ab7faf 100644
--- a/include/asm-i386/mman.h
+++ b/include/asm-i386/mman.h
@@ -1,19 +1,7 @@
 #ifndef __I386_MMAN_H__
 #define __I386_MMAN_H__
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#include <asm-generic/mman.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
@@ -23,24 +11,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif /* __I386_MMAN_H__ */
diff --git a/include/asm-ia64/mman.h b/include/asm-ia64/mman.h
index 357ebb780cc0..6ba179f12718 100644
--- a/include/asm-ia64/mman.h
+++ b/include/asm-ia64/mman.h
@@ -8,19 +8,7 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
  */
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#include <asm-generic/mman.h>
 
 #define MAP_GROWSDOWN	0x00100		/* stack-like segment */
 #define MAP_GROWSUP	0x00200		/* register stack-like segment */
@@ -31,24 +19,7 @@
 #define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif /* _ASM_IA64_MMAN_H */
diff --git a/include/asm-m32r/mman.h b/include/asm-m32r/mman.h
index 6b02fe3fcff2..695a860c024f 100644
--- a/include/asm-m32r/mman.h
+++ b/include/asm-m32r/mman.h
@@ -1,21 +1,9 @@
 #ifndef __M32R_MMAN_H__
 #define __M32R_MMAN_H__
 
-/* orig : i386 2.6.0-test6 */
-
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+#include <asm-generic/mman.h>
 
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
+/* orig : i386 2.6.0-test6 */
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
@@ -25,24 +13,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif /* __M32R_MMAN_H__ */
diff --git a/include/asm-m68k/mman.h b/include/asm-m68k/mman.h
index efd12bc4ccb7..1626d37f4898 100644
--- a/include/asm-m68k/mman.h
+++ b/include/asm-m68k/mman.h
@@ -1,19 +1,7 @@
 #ifndef __M68K_MMAN_H__
 #define __M68K_MMAN_H__
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#include <asm-generic/mman.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
@@ -23,24 +11,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif /* __M68K_MMAN_H__ */
diff --git a/include/asm-mips/mman.h b/include/asm-mips/mman.h
index 6d01e26830fa..046cf686bee7 100644
--- a/include/asm-mips/mman.h
+++ b/include/asm-mips/mman.h
@@ -60,17 +60,19 @@
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
+#define MADV_NORMAL	0		/* no further special treatment */
+#define MADV_RANDOM	1		/* expect random page references */
+#define MADV_SEQUENTIAL	2		/* expect sequential page references */
+#define MADV_WILLNEED	3		/* will need these pages */
+#define MADV_DONTNEED	4		/* don't need these pages */
+
+/* common parameters: try to keep these consistent across architectures */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
 
 /* compatibility flags */
-#define MAP_ANON       MAP_ANONYMOUS
-#define MAP_FILE       0
+#define MAP_ANON	MAP_ANONYMOUS
+#define MAP_FILE	0
 
 #endif /* _ASM_MMAN_H */
diff --git a/include/asm-parisc/mman.h b/include/asm-parisc/mman.h
index a381cf5c8f55..0ef15ee0f17e 100644
--- a/include/asm-parisc/mman.h
+++ b/include/asm-parisc/mman.h
@@ -38,7 +38,11 @@
 #define MADV_SPACEAVAIL 5               /* insure that resources are reserved */
 #define MADV_VPS_PURGE  6               /* Purge pages from VM page cache */
 #define MADV_VPS_INHERIT 7              /* Inherit parents page size */
-#define MADV_REMOVE     8		/* remove these pages & resources */
+
+/* common/generic parameters */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
 
 /* The range 12-64 is reserved for page size specification. */
 #define MADV_4K_PAGES   12              /* Use 4K pages  */
@@ -49,8 +53,6 @@
 #define MADV_4M_PAGES   22              /* Use 4 Megabyte pages */
 #define MADV_16M_PAGES  24              /* Use 16 Megabyte pages */
 #define MADV_64M_PAGES  26              /* Use 64 Megabyte pages */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-powerpc/mman.h b/include/asm-powerpc/mman.h
index fcff25d13f13..24cf664a8295 100644
--- a/include/asm-powerpc/mman.h
+++ b/include/asm-powerpc/mman.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_POWERPC_MMAN_H
 #define _ASM_POWERPC_MMAN_H
 
+#include <asm-generic/mman.h>
+
 /*
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -8,19 +10,6 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
 #define MAP_RENAME      MAP_ANONYMOUS   /* In SunOS terminology */
 #define MAP_NORESERVE   0x40            /* don't reserve swap pages */
 #define MAP_LOCKED	0x80
@@ -29,27 +18,10 @@
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
 #define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT     0x2000          /* lock all currently mapped pages */
 #define MCL_FUTURE      0x4000          /* lock all additions to address space */
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif	/* _ASM_POWERPC_MMAN_H */
diff --git a/include/asm-s390/mman.h b/include/asm-s390/mman.h
index d41ca1477010..7839767d837e 100644
--- a/include/asm-s390/mman.h
+++ b/include/asm-s390/mman.h
@@ -9,19 +9,7 @@
 #ifndef __S390_MMAN_H__
 #define __S390_MMAN_H__
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#include <asm-generic/mman.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
@@ -31,24 +19,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL    0x0              /* default page-in behavior */
-#define MADV_RANDOM    0x1              /* page-in minimum required */
-#define MADV_SEQUENTIAL        0x2             /* read-ahead aggressively */
-#define MADV_WILLNEED  0x3              /* pre-fault pages */
-#define MADV_DONTNEED  0x4              /* discard these pages */
-#define MADV_REMOVE    0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif /* __S390_MMAN_H__ */
diff --git a/include/asm-sh/mman.h b/include/asm-sh/mman.h
index 0e08d0573abc..156eb0225cf6 100644
--- a/include/asm-sh/mman.h
+++ b/include/asm-sh/mman.h
@@ -1,19 +1,7 @@
 #ifndef __ASM_SH_MMAN_H
 #define __ASM_SH_MMAN_H
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#include <asm-generic/mman.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
@@ -23,24 +11,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) page tables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif /* __ASM_SH_MMAN_H */
diff --git a/include/asm-sparc/mman.h b/include/asm-sparc/mman.h
index 4a298b2be859..88d1886abf3b 100644
--- a/include/asm-sparc/mman.h
+++ b/include/asm-sparc/mman.h
@@ -2,21 +2,10 @@
 #ifndef __SPARC_MMAN_H__
 #define __SPARC_MMAN_H__
 
-/* SunOS'ified... */
+#include <asm-generic/mman.h>
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+/* SunOS'ified... */
 
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
 #define MAP_RENAME      MAP_ANONYMOUS   /* In SunOS terminology */
 #define MAP_NORESERVE   0x40            /* don't reserve swap pages */
 #define MAP_INHERIT     0x80            /* SunOS doesn't do this, but... */
@@ -27,10 +16,6 @@
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
 #define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT     0x2000          /* lock all currently mapped pages */
 #define MCL_FUTURE      0x4000          /* lock all additions to address space */
 
@@ -48,18 +33,6 @@
 #define MC_LOCKAS       5  /* Lock an entire address space of the calling process */
 #define MC_UNLOCKAS     6  /* Unlock entire address space of calling process */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_FREE	0x5		/* (Solaris) contents can be freed */
-#define MADV_REMOVE	0x6		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
 
 #endif /* __SPARC_MMAN_H__ */
diff --git a/include/asm-sparc64/mman.h b/include/asm-sparc64/mman.h
index d705ec92da8b..6fd878e61435 100644
--- a/include/asm-sparc64/mman.h
+++ b/include/asm-sparc64/mman.h
@@ -2,21 +2,10 @@
 #ifndef __SPARC64_MMAN_H__
 #define __SPARC64_MMAN_H__
 
-/* SunOS'ified... */
+#include <asm-generic/mman.h>
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+/* SunOS'ified... */
 
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
 #define MAP_RENAME      MAP_ANONYMOUS   /* In SunOS terminology */
 #define MAP_NORESERVE   0x40            /* don't reserve swap pages */
 #define MAP_INHERIT     0x80            /* SunOS doesn't do this, but... */
@@ -27,10 +16,6 @@
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
 #define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT     0x2000          /* lock all currently mapped pages */
 #define MCL_FUTURE      0x4000          /* lock all additions to address space */
 
@@ -48,18 +33,6 @@
 #define MC_LOCKAS       5  /* Lock an entire address space of the calling process */
 #define MC_UNLOCKAS     6  /* Unlock entire address space of calling process */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_FREE	0x5		/* (Solaris) contents can be freed */
-#define MADV_REMOVE	0x6		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
 
 #endif /* __SPARC64_MMAN_H__ */
diff --git a/include/asm-v850/mman.h b/include/asm-v850/mman.h
index 7b851c310e41..edbf6edbfb37 100644
--- a/include/asm-v850/mman.h
+++ b/include/asm-v850/mman.h
@@ -1,18 +1,7 @@
 #ifndef __V850_MMAN_H__
 #define __V850_MMAN_H__
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#include <asm-generic/mman.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
@@ -20,24 +9,7 @@
 #define MAP_LOCKED	0x2000		/* pages are locked */
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif /* __V850_MMAN_H__ */
diff --git a/include/asm-x86_64/mman.h b/include/asm-x86_64/mman.h
index b699a38c1c3c..dd5cb0534d37 100644
--- a/include/asm-x86_64/mman.h
+++ b/include/asm-x86_64/mman.h
@@ -1,19 +1,8 @@
 #ifndef __X8664_MMAN_H__
 #define __X8664_MMAN_H__
 
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_SEM	0x8
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+#include <asm-generic/mman.h>
 
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
 #define MAP_32BIT	0x40		/* only give out 32bit addresses */
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
@@ -24,24 +13,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_ANON	MAP_ANONYMOUS
-#define MAP_FILE	0
-
 #endif
diff --git a/include/asm-xtensa/mman.h b/include/asm-xtensa/mman.h
index e2d7afb679c8..ba394cbb4807 100644
--- a/include/asm-xtensa/mman.h
+++ b/include/asm-xtensa/mman.h
@@ -67,17 +67,19 @@
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#define MADV_NORMAL	0x0		/* default page-in behavior */
-#define MADV_RANDOM	0x1		/* page-in minimum required */
-#define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
-#define MADV_WILLNEED	0x3		/* pre-fault pages */
-#define MADV_DONTNEED	0x4		/* discard these pages */
-#define MADV_REMOVE	0x5		/* remove these pages & resources */
-#define MADV_DONTFORK	0x30		/* dont inherit across fork */
-#define MADV_DOFORK	0x31		/* do inherit across fork */
+#define MADV_NORMAL	0		/* no further special treatment */
+#define MADV_RANDOM	1		/* expect random page references */
+#define MADV_SEQUENTIAL	2		/* expect sequential page references */
+#define MADV_WILLNEED	3		/* will need these pages */
+#define MADV_DONTNEED	4		/* don't need these pages */
+
+/* common parameters: try to keep these consistent across architectures */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
 
 /* compatibility flags */
-#define MAP_ANON       MAP_ANONYMOUS
-#define MAP_FILE       0
+#define MAP_ANON	MAP_ANONYMOUS
+#define MAP_FILE	0
 
 #endif /* _XTENSA_MMAN_H */
-- 
cgit v1.2.3


From b2ee9dbfad14ba8e34a589d552ddc67300a26bec Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Wed, 15 Feb 2006 15:17:40 -0800
Subject: [PATCH] hrtimer: fix multiple macro argument expansion

For two macros the arguments were expanded twice, change them to inline
functions to avoid it.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ktime.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 6aca67a569a2..f3dec45ef874 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -96,10 +96,16 @@ static inline ktime_t ktime_set(const long secs, const unsigned long nsecs)
 		({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; })
 
 /* convert a timespec to ktime_t format: */
-#define timespec_to_ktime(ts)		ktime_set((ts).tv_sec, (ts).tv_nsec)
+static inline ktime_t timespec_to_ktime(struct timespec ts)
+{
+	return ktime_set(ts.tv_sec, ts.tv_nsec);
+}
 
 /* convert a timeval to ktime_t format: */
-#define timeval_to_ktime(tv)		ktime_set((tv).tv_sec, (tv).tv_usec * 1000)
+static inline ktime_t timeval_to_ktime(struct timeval tv)
+{
+	return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC);
+}
 
 /* Map the ktime_t to timespec conversion to ns_to_timespec function */
 #define ktime_to_timespec(kt)		ns_to_timespec((kt).tv64)
-- 
cgit v1.2.3


From 7bbb79403163e047c6e333ff169db34e3c969e65 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Thu, 16 Feb 2006 11:08:09 +0000
Subject: [ARM] Fix SMP initialisation oops

A change to the SMP initialisation caused the following oops:

 CPU1: Booted secondary processor
 CPU1: D VIPT write-back cache
 CPU1: I cache: 32768 bytes, associativity 4, 32 byte lines, 256 sets
 CPU1: D cache: 32768 bytes, associativity 4, 32 byte lines, 256 sets
 <7>Calibrating delay loop... 83.14 BogoMIPS (lpj=415744)
 <1>Unable to handle kernel NULL pointer dereference at virtual address 0000001c
 ...
 PC is at enqueue_task+0x1c/0x64
 LR is at activate_task+0xcc/0xe4

SMP initialisation now requires cpu_possible_map to be initialised in
setup_arch().  Move this from smp_prepare_cpus() to smp_init_cpus()
and call it from our setup_arch() if CONFIG_SMP is enabled.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/setup.c            |  5 +++++
 arch/arm/kernel/smp.c              |  1 -
 arch/arm/mach-integrator/platsmp.c | 21 +++++++++++++++------
 arch/arm/mach-realview/platsmp.c   | 21 +++++++++++++++------
 include/asm-arm/smp.h              |  5 +++++
 5 files changed, 40 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index c45d10d07bde..68273b4dc882 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -23,6 +23,7 @@
 #include <linux/root_dev.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
+#include <linux/smp.h>
 
 #include <asm/cpu.h>
 #include <asm/elf.h>
@@ -771,6 +772,10 @@ void __init setup_arch(char **cmdline_p)
 	paging_init(&meminfo, mdesc);
 	request_standard_resources(&meminfo, mdesc);
 
+#ifdef CONFIG_SMP
+	smp_init_cpus();
+#endif
+
 	cpu_init();
 
 	/*
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 7338948bd7d3..02aa300c4633 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -338,7 +338,6 @@ void __init smp_prepare_boot_cpu(void)
 
 	per_cpu(cpu_data, cpu).idle = current;
 
-	cpu_set(cpu, cpu_possible_map);
 	cpu_set(cpu, cpu_present_map);
 	cpu_set(cpu, cpu_online_map);
 }
diff --git a/arch/arm/mach-integrator/platsmp.c b/arch/arm/mach-integrator/platsmp.c
index ea10bd8c972c..1bc8534ef0c6 100644
--- a/arch/arm/mach-integrator/platsmp.c
+++ b/arch/arm/mach-integrator/platsmp.c
@@ -140,6 +140,18 @@ static void __init poke_milo(void)
 	mb();
 }
 
+/*
+ * Initialise the CPU possible map early - this describes the CPUs
+ * which may be present or become present in the system.
+ */
+void __init smp_init_cpus(void)
+{
+	unsigned int i, ncores = get_core_count();
+
+	for (i = 0; i < ncores; i++)
+		cpu_set(i, cpu_possible_map);
+}
+
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned int ncores = get_core_count();
@@ -176,14 +188,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		max_cpus = ncores;
 
 	/*
-	 * Initialise the possible/present maps.
-	 * cpu_possible_map describes the set of CPUs which may be present
-	 * cpu_present_map describes the set of CPUs populated
+	 * Initialise the present map, which describes the set of CPUs
+	 * actually populated at the present time.
 	 */
-	for (i = 0; i < max_cpus; i++) {
-		cpu_set(i, cpu_possible_map);
+	for (i = 0; i < max_cpus; i++)
 		cpu_set(i, cpu_present_map);
-	}
 
 	/*
 	 * Do we need any more CPUs? If so, then let them know where
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index a8fbd76d8be5..b8484e15dacb 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -143,6 +143,18 @@ static void __init poke_milo(void)
 	mb();
 }
 
+/*
+ * Initialise the CPU possible map early - this describes the CPUs
+ * which may be present or become present in the system.
+ */
+void __init smp_init_cpus(void)
+{
+	unsigned int i, ncores = get_core_count();
+
+	for (i = 0; i < ncores; i++)
+		cpu_set(i, cpu_possible_map);
+}
+
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned int ncores = get_core_count();
@@ -179,14 +191,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	local_timer_setup(cpu);
 
 	/*
-	 * Initialise the possible/present maps.
-	 * cpu_possible_map describes the set of CPUs which may be present
-	 * cpu_present_map describes the set of CPUs populated
+	 * Initialise the present map, which describes the set of CPUs
+	 * actually populated at the present time.
 	 */
-	for (i = 0; i < max_cpus; i++) {
-		cpu_set(i, cpu_possible_map);
+	for (i = 0; i < max_cpus; i++)
 		cpu_set(i, cpu_present_map);
-	}
 
 	/*
 	 * Do we need any more CPUs? If so, then let them know where
diff --git a/include/asm-arm/smp.h b/include/asm-arm/smp.h
index 5a72e50ca9fc..fe45f7f61223 100644
--- a/include/asm-arm/smp.h
+++ b/include/asm-arm/smp.h
@@ -41,6 +41,11 @@ extern void show_ipi_list(struct seq_file *p);
  */
 asmlinkage void do_IPI(struct pt_regs *regs);
 
+/*
+ * Setup the SMP cpu_possible_map
+ */
+extern void smp_init_cpus(void);
+
 /*
  * Move global data into per-processor storage.
  */
-- 
cgit v1.2.3


From d9db950cfa3d674ee834d980c329efdf8e4a0568 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Thu, 16 Feb 2006 22:36:15 +0000
Subject: [ARM] 3339/1: ARM EABI: make unmuxed syscalls visible

Patch from Nicolas Pitre

With EABI the multiplex sys_ipc and sys_socketcall syscalls are
unavailable and their support code even removed from the compiled
kernel, and the new unmuxed syscalls must be used instead.

Make those syscall numbers visible.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/unistd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h
index 77430d6178ae..8f331bbd39a8 100644
--- a/include/asm-arm/unistd.h
+++ b/include/asm-arm/unistd.h
@@ -309,7 +309,7 @@
 #define __NR_mq_getsetattr		(__NR_SYSCALL_BASE+279)
 #define __NR_waitid			(__NR_SYSCALL_BASE+280)
 
-#if 0 /* reserve these for un-muxing socketcall */
+#if defined(__ARM_EABI__)  /* reserve these for un-muxing socketcall */
 #define __NR_socket			(__NR_SYSCALL_BASE+281)
 #define __NR_bind			(__NR_SYSCALL_BASE+282)
 #define __NR_connect			(__NR_SYSCALL_BASE+283)
@@ -329,7 +329,7 @@
 #define __NR_recvmsg			(__NR_SYSCALL_BASE+297)
 #endif
 
-#if 0 /* reserve these for un-muxing ipc */
+#if defined(__ARM_EABI__)  /* reserve these for un-muxing ipc */
 #define __NR_semop			(__NR_SYSCALL_BASE+298)
 #define __NR_semget			(__NR_SYSCALL_BASE+299)
 #define __NR_semctl			(__NR_SYSCALL_BASE+300)
@@ -347,7 +347,7 @@
 #define __NR_request_key		(__NR_SYSCALL_BASE+310)
 #define __NR_keyctl			(__NR_SYSCALL_BASE+311)
 
-#if 0 /* reserved for un-muxing ipc */
+#if defined(__ARM_EABI__)  /* reserved for un-muxing ipc */
 #define __NR_semtimedop			(__NR_SYSCALL_BASE+312)
 #endif
 
-- 
cgit v1.2.3


From a62eaf151d9cb478d127cfbc2e93c498869785b0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 16 Feb 2006 23:41:58 +0100
Subject: [PATCH] x86_64: Add boot option to disable randomized mappings and
 cleanup

AMD SimNow!'s JIT doesn't like them at all in the guest. For distribution
installation it's easiest if it's a boot time option.

Also I moved the variable to a more appropiate place and make
it independent from sysctl

And marked __read_mostly which it is.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/kernel-parameters.txt |  3 +++
 arch/i386/kernel/cpu/transmeta.c    |  1 +
 include/linux/kernel.h              |  6 ------
 include/linux/mm.h                  |  2 ++
 kernel/sysctl.c                     |  2 --
 mm/memory.c                         | 10 ++++++++++
 6 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ac75b57edf2e..b874771385cd 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1638,6 +1638,9 @@ running once the system is up.
 			Format:
 			<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
 
+	norandmaps	Don't use address space randomization
+			Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space
+
 
 ______________________________________________________________________
 Changelog:
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
index bdbeb77f4e22..7214c9b577ab 100644
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -1,4 +1,5 @@
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/init.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index b49affa0ac5a..3b507bf05d09 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -326,12 +326,6 @@ struct sysinfo {
 /* Force a compilation error if condition is true */
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 
-#ifdef CONFIG_SYSCTL
-extern int randomize_va_space;
-#else
-#define randomize_va_space 1
-#endif
-
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 75e9f0724997..26e1663a5cbe 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1051,5 +1051,7 @@ int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 void drop_pagecache(void);
 void drop_slab(void);
 
+extern int randomize_va_space;
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 71dd6f62efec..7654d55c47f5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -126,8 +126,6 @@ extern int sysctl_hz_timer;
 extern int acct_parm[];
 #endif
 
-int randomize_va_space = 1;
-
 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
 		       ctl_table *, void **);
 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
diff --git a/mm/memory.c b/mm/memory.c
index 2bee1f21aa8a..9abc6008544b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -82,6 +82,16 @@ EXPORT_SYMBOL(num_physpages);
 EXPORT_SYMBOL(high_memory);
 EXPORT_SYMBOL(vmalloc_earlyreserve);
 
+int randomize_va_space __read_mostly = 1;
+
+static int __init disable_randmaps(char *s)
+{
+	randomize_va_space = 0;
+	return 0;
+}
+__setup("norandmaps", disable_randmaps);
+
+
 /*
  * If a p?d_bad entry is found while walking page tables, report
  * the error, before resetting entry to p?d_none.  Usually (but
-- 
cgit v1.2.3


From 7fd67843b96f90f59c9a244a1bc25137978a3ff9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 16 Feb 2006 23:42:07 +0100
Subject: [PATCH] x86_64: Disable tsc when apicpmtimer is active

Otherwise it has no effect anyways.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/apic.c  | 1 +
 arch/x86_64/kernel/time.c  | 3 +--
 include/asm-x86_64/proto.h | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 7a0a3e8d5d72..e5b14c57eaa0 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -1152,6 +1152,7 @@ __setup("noapicmaintimer", setup_noapicmaintimer);
 static __init int setup_apicpmtimer(char *s)
 {
 	apic_calibrate_pmtmr = 1;
+	notsc_setup(NULL);
 	return setup_apicmaintimer(NULL);
 }
 __setup("apicpmtimer", setup_apicpmtimer);
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 3c58c30506a1..67841d11ed1f 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -1327,8 +1327,7 @@ static int __init nohpet_setup(char *s)
 
 __setup("nohpet", nohpet_setup);
 
-
-static int __init notsc_setup(char *s)
+int __init notsc_setup(char *s)
 {
 	notsc = 1;
 	return 0;
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index c99832e7bf3f..eca3f2d633db 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -133,6 +133,7 @@ extern int fix_aperture;
 extern int force_iommu;
 
 extern int reboot_force;
+extern int notsc_setup(char *);
 
 extern void smp_local_timer_interrupt(struct pt_regs * regs);
 
-- 
cgit v1.2.3


From 726c14bf499e91e7ede4f1728830aba05c675061 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 17 Feb 2006 10:30:23 +1100
Subject: [PATCH] Provide an interface for getting the current tick length

This provides an interface for arch code to find out how many
nanoseconds are going to be added on to xtime by the next call to
do_timer.  The value returned is a fixed-point number in 52.12 format
in nanoseconds.  The reason for this format is that it gives the
full precision that the timekeeping code is using internally.

The motivation for this is to fix a problem that has arisen on 32-bit
powerpc in that the value returned by do_gettimeofday drifts apart
from xtime if NTP is being used.  PowerPC is now using a lockless
do_gettimeofday based on reading the timebase register and performing
some simple arithmetic.  (This method of getting the time is also
exported to userspace via the VDSO.)  However, the factor and offset
it uses were calculated based on the nominal tick length and weren't
being adjusted when NTP varied the tick length.

Note that 64-bit powerpc has had the lockless do_gettimeofday for a
long time now.  It also had an extremely hairy routine that got called
from the 32-bit compat routine for adjtimex, which adjusted the
factor and offset according to what it thought the timekeeping code
was going to do.  Not only was this only called if a 32-bit task did
adjtimex (i.e. not if a 64-bit task did adjtimex), it was also
duplicating computations from kernel/timer.c and it wasn't clear that
it was (still) correct.

The simple solution is to ask the timekeeping code how long the
current jiffy will be on each timer interrupt, after calling
do_timer.  If this jiffy will be a different length from the last one,
we then need to compute new values for the factor and offset used in
the lockless do_gettimeofday.  In this way we can keep xtime and
do_gettimeofday in sync, even when NTP is varying the tick length.

Note that when adjtimex varies the tick length, it almost always
introduces the variation from the next tick on.  The only case I could
see where adjtimex would vary the length of the current tick is when
an old-style adjtime adjustment is being cancelled.  (It's not clear
to me why the adjustment has to be cancelled immediately rather than
from the next tick on.)  Thus I don't see any real need for a hook in
adjtimex; the rare case of an old-style adjustment being cancelled can
be fixed up at the next tick.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: john stultz <johnstul@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/timex.h |  3 +++
 kernel/timer.c        | 39 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 37 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 04a4a8cb4ed3..b7ca1204e42a 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -345,6 +345,9 @@ time_interpolator_reset(void)
 
 #endif /* !CONFIG_TIME_INTERPOLATION */
 
+/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
+extern u64 current_tick_length(void);
+
 #endif /* KERNEL */
 
 #endif /* LINUX_TIMEX_H */
diff --git a/kernel/timer.c b/kernel/timer.c
index b9dad3994676..fe3a9a9f8328 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -717,12 +717,16 @@ static void second_overflow(void)
 #endif
 }
 
-/* in the NTP reference this is called "hardclock()" */
-static void update_wall_time_one_tick(void)
+/*
+ * Returns how many microseconds we need to add to xtime this tick
+ * in doing an adjustment requested with adjtime.
+ */
+static long adjtime_adjustment(void)
 {
-	long time_adjust_step, delta_nsec;
+	long time_adjust_step;
 
-	if ((time_adjust_step = time_adjust) != 0 ) {
+	time_adjust_step = time_adjust;
+	if (time_adjust_step) {
 		/*
 		 * We are doing an adjtime thing.  Prepare time_adjust_step to
 		 * be within bounds.  Note that a positive time_adjust means we
@@ -733,10 +737,19 @@ static void update_wall_time_one_tick(void)
 		 */
 		time_adjust_step = min(time_adjust_step, (long)tickadj);
 		time_adjust_step = max(time_adjust_step, (long)-tickadj);
+	}
+	return time_adjust_step;
+}
 
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
+{
+	long time_adjust_step, delta_nsec;
+
+	time_adjust_step = adjtime_adjustment();
+	if (time_adjust_step)
 		/* Reduce by this step the amount of time left  */
 		time_adjust -= time_adjust_step;
-	}
 	delta_nsec = tick_nsec + time_adjust_step * 1000;
 	/*
 	 * Advance the phase, once it gets to one microsecond, then
@@ -758,6 +771,22 @@ static void update_wall_time_one_tick(void)
 	}
 }
 
+/*
+ * Return how long ticks are at the moment, that is, how much time
+ * update_wall_time_one_tick will add to xtime next time we call it
+ * (assuming no calls to do_adjtimex in the meantime).
+ * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10
+ * bits to the right of the binary point.
+ * This function has no side-effects.
+ */
+u64 current_tick_length(void)
+{
+	long delta_nsec;
+
+	delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
+	return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj;
+}
+
 /*
  * Using a loop looks inefficient, but "ticks" is
  * usually just one (we shouldn't be losing ticks,
-- 
cgit v1.2.3


From cfe91f9ce297e23e6fbdf61c02bdd8ab9af7c8a8 Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <76306.1226@compuserve.com>
Date: Fri, 17 Feb 2006 03:16:55 -0500
Subject: [PATCH] i386: fix singlestepping though a syscall

Do not mask TIF_SINGLESTEP bit in _TIF_WORK_MASK. Masking this stopped
do_notify_resume() from being called when it should have been.

Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/thread_info.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
index e20e99551d71..1f7d48c9ba3f 100644
--- a/include/asm-i386/thread_info.h
+++ b/include/asm-i386/thread_info.h
@@ -158,8 +158,8 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__;
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
-  (0x0000FFFF & ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|\
-		  _TIF_SECCOMP|_TIF_SYSCALL_EMU))
+  (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+		  _TIF_SECCOMP | _TIF_SYSCALL_EMU))
 /* work to do on any return to u-space */
 #define _TIF_ALLWORK_MASK	(0x0000FFFF & ~_TIF_SECCOMP)
 
-- 
cgit v1.2.3


From 255acee706b333b79f593dd366f16e1f107cccc3 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 17 Feb 2006 13:52:46 -0800
Subject: [PATCH] s390: additional_cpus parameter

Introduce additional_cpus command line option.  By default no additional cpu
can be attached to the system anymore.  Only the cpus present at IPL time can
be switched on/off.  If it is desired that additional cpus can be attached to
the system the maximum number of additional cpus needs to be specified with
this option.

This change is necessary in order to limit the waste of per_cpu data
structures.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/cpu-hotplug.txt | 10 ++++----
 arch/s390/kernel/setup.c      |  2 ++
 arch/s390/kernel/smp.c        | 58 +++++++++++++++++++++++++++++--------------
 include/asm-s390/smp.h        |  2 ++
 4 files changed, 49 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index e05278087ffa..4d3355da0e26 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -11,6 +11,8 @@
 			Joel Schopp <jschopp@austin.ibm.com>
 		ia64/x86_64:
 			Ashok Raj <ashok.raj@intel.com>
+		s390:
+			Heiko Carstens <heiko.carstens@de.ibm.com>
 
 Authors: Ashok Raj <ashok.raj@intel.com>
 Lots of feedback: Nathan Lynch <nathanl@austin.ibm.com>,
@@ -44,11 +46,9 @@ maxcpus=n    Restrict boot time cpus to n. Say if you have 4 cpus, using
              maxcpus=2 will only boot 2. You can choose to bring the
              other cpus later online, read FAQ's for more info.
 
-additional_cpus*=n	Use this to limit hotpluggable cpus. This option sets
-			cpu_possible_map = cpu_present_map + additional_cpus
-
-(*) Option valid only for following architectures
-- x86_64, ia64
+additional_cpus=n	[x86_64, s390 only] use this to limit hotpluggable cpus.
+                          This option sets
+  			cpu_possible_map = cpu_present_map + additional_cpus
 
 ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT
 to determine the number of potentially hot-pluggable cpus. The implementation
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index de8784267473..24f62f16c0e5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -600,6 +600,7 @@ setup_arch(char **cmdline_p)
 	init_mm.brk = (unsigned long) &_end;
 
 	parse_cmdline_early(cmdline_p);
+	parse_early_param();
 
 	setup_memory();
 	setup_resources();
@@ -607,6 +608,7 @@ setup_arch(char **cmdline_p)
 
         cpu_init();
         __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
+	smp_setup_cpu_possible_map();
 
 	/*
 	 * Create kernel page tables and switch to virtual addressing.
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 0d1ad5dbe2b1..53291e94ac7b 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1,8 +1,7 @@
 /*
  *  arch/s390/kernel/smp.c
  *
- *  S390 version
- *    Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *    Copyright (C) IBM Corp. 1999,2006
  *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
  *               Heiko Carstens (heiko.carstens@de.ibm.com)
@@ -41,8 +40,6 @@
 #include <asm/cpcmd.h>
 #include <asm/tlbflush.h>
 
-/* prototypes */
-
 extern volatile int __cpu_logical_map[];
 
 /*
@@ -51,13 +48,11 @@ extern volatile int __cpu_logical_map[];
 
 struct _lowcore *lowcore_ptr[NR_CPUS];
 
-cpumask_t cpu_online_map;
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
+cpumask_t cpu_online_map = CPU_MASK_NONE;
+cpumask_t cpu_possible_map = CPU_MASK_NONE;
 
 static struct task_struct *current_set[NR_CPUS];
 
-EXPORT_SYMBOL(cpu_online_map);
-
 /*
  * Reboot, halt and power_off routines for SMP.
  */
@@ -490,10 +485,10 @@ void smp_ctl_clear_bit(int cr, int bit) {
  * Lets check how many CPUs we have.
  */
 
-void
-__init smp_check_cpus(unsigned int max_cpus)
+static unsigned int
+__init smp_count_cpus(void)
 {
-	int cpu, num_cpus;
+	unsigned int cpu, num_cpus;
 	__u16 boot_cpu_addr;
 
 	/*
@@ -503,19 +498,20 @@ __init smp_check_cpus(unsigned int max_cpus)
 	boot_cpu_addr = S390_lowcore.cpu_data.cpu_addr;
 	current_thread_info()->cpu = 0;
 	num_cpus = 1;
-	for (cpu = 0; cpu <= 65535 && num_cpus < max_cpus; cpu++) {
+	for (cpu = 0; cpu <= 65535; cpu++) {
 		if ((__u16) cpu == boot_cpu_addr)
 			continue;
-		__cpu_logical_map[num_cpus] = (__u16) cpu;
-		if (signal_processor(num_cpus, sigp_sense) ==
+		__cpu_logical_map[1] = (__u16) cpu;
+		if (signal_processor(1, sigp_sense) ==
 		    sigp_not_operational)
 			continue;
-		cpu_set(num_cpus, cpu_present_map);
 		num_cpus++;
 	}
 
 	printk("Detected %d CPU's\n",(int) num_cpus);
 	printk("Boot cpu address %2X\n", boot_cpu_addr);
+
+	return num_cpus;
 }
 
 /*
@@ -676,6 +672,32 @@ __cpu_up(unsigned int cpu)
 	return 0;
 }
 
+static unsigned int __initdata additional_cpus;
+
+void __init smp_setup_cpu_possible_map(void)
+{
+	unsigned int pcpus, cpu;
+
+	pcpus = smp_count_cpus() + additional_cpus;
+
+	if (pcpus > NR_CPUS)
+		pcpus = NR_CPUS;
+
+	for (cpu = 0; cpu < pcpus; cpu++)
+		cpu_set(cpu, cpu_possible_map);
+
+	cpu_present_map = cpu_possible_map;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int __init setup_additional_cpus(char *s)
+{
+	additional_cpus = simple_strtoul(s, NULL, 0);
+	return 0;
+}
+early_param("additional_cpus", setup_additional_cpus);
+
 int
 __cpu_disable(void)
 {
@@ -744,6 +766,8 @@ cpu_die(void)
 	for(;;);
 }
 
+#endif /* CONFIG_HOTPLUG_CPU */
+
 /*
  *	Cycle through the processors and setup structures.
  */
@@ -757,7 +781,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
         /* request the 0x1201 emergency signal external interrupt */
         if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
                 panic("Couldn't request external interrupt 0x1201");
-        smp_check_cpus(max_cpus);
         memset(lowcore_ptr,0,sizeof(lowcore_ptr));  
         /*
          *  Initialize prefix pages and stacks for all possible cpus
@@ -806,14 +829,12 @@ void __devinit smp_prepare_boot_cpu(void)
 	BUG_ON(smp_processor_id() != 0);
 
 	cpu_set(0, cpu_online_map);
-	cpu_set(0, cpu_present_map);
 	S390_lowcore.percpu_offset = __per_cpu_offset[0];
 	current_set[0] = current;
 }
 
 void smp_cpus_done(unsigned int max_cpus)
 {
-	cpu_present_map = cpu_possible_map;
 }
 
 /*
@@ -845,6 +866,7 @@ static int __init topology_init(void)
 
 subsys_initcall(topology_init);
 
+EXPORT_SYMBOL(cpu_online_map);
 EXPORT_SYMBOL(cpu_possible_map);
 EXPORT_SYMBOL(lowcore_ptr);
 EXPORT_SYMBOL(smp_ctl_set_bit);
diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h
index 9c6e9c300eb9..444dae5912e6 100644
--- a/include/asm-s390/smp.h
+++ b/include/asm-s390/smp.h
@@ -31,6 +31,7 @@ typedef struct
 	__u16      cpu;
 } sigp_info;
 
+extern void smp_setup_cpu_possible_map(void);
 extern int smp_call_function_on(void (*func) (void *info), void *info,
 				int nonatomic, int wait, int cpu);
 #define NO_PROC_ID		0xFF		/* No processor magic marker */
@@ -104,6 +105,7 @@ smp_call_function_on(void (*func) (void *info), void *info,
 #define smp_cpu_not_running(cpu)	1
 #define smp_get_cpu(cpu) ({ 0; })
 #define smp_put_cpu(cpu) ({ 0; })
+#define smp_setup_cpu_possible_map()
 #endif
 
 #endif
-- 
cgit v1.2.3


From 200a4552af34b9a32e1f68a881a9ed5c7ec699cc Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Fri, 17 Feb 2006 13:52:56 -0800
Subject: [PATCH] powerpc: Fix accidentally-working typo in __pud_free_tlb

One of the parameters to the __pud_free_tlb() macro for powerpc is
incorrect (see patch) .  We get away with it by accident, because the one
place the macro is called, the second parameter is a variable named "pud".

Signed-off-by: David Gibson <dwg@au1.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-powerpc/pgalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-powerpc/pgalloc.h b/include/asm-powerpc/pgalloc.h
index 9f5b052784a5..a00ee002cd11 100644
--- a/include/asm-powerpc/pgalloc.h
+++ b/include/asm-powerpc/pgalloc.h
@@ -146,7 +146,7 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
 	pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
 		PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
 #ifndef CONFIG_PPC_64K_PAGES
-#define __pud_free_tlb(tlb, pmd)	\
+#define __pud_free_tlb(tlb, pud)	\
 	pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
 		PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
 #endif /* CONFIG_PPC_64K_PAGES */
-- 
cgit v1.2.3