From 7b7232f3fb5ecd7c30cb52df368070cc5f5ca614 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Feb 2006 21:06:49 -0600 Subject: [SCSI] iscsi update: cleanup iscsi class interface From: michaelc@cs.wisc.edu fujita.tomonori@lab.ntt.co.jp da-x@monatomic.org and err path fixup from: ogerlitz@voltaire.com This patch cleans up that interface by having the lld and class pass a iscsi_cls_session or iscsi_cls_conn between each other when the function is used by HW and SW iscsi llds. This way the lld does not have to remember if it has to send a handle or pointer and a handle or pointer to connection, session or host. This also has the class verify the session handle that gets passed from userspace instead of using the pointer passed into the kernel directly. Signed-off-by: Mike Christie Signed-off-by: Alex Aizman Signed-off-by: Dmitry Yusupov Signed-off-by: James Bottomley --- include/scsi/iscsi_if.h | 3 --- include/scsi/scsi_transport_iscsi.h | 34 ++++++++++++++++++---------------- 2 files changed, 18 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h index 3e5cb5ab2d34..e5618b90996e 100644 --- a/include/scsi/iscsi_if.h +++ b/include/scsi/iscsi_if.h @@ -163,9 +163,6 @@ enum iscsi_param { }; #define ISCSI_PARAM_MAX 14 -typedef uint64_t iscsi_sessionh_t; /* iSCSI Data-Path session handle */ -typedef uint64_t iscsi_connh_t; /* iSCSI Data-Path connection handle */ - #define iscsi_ptr(_handle) ((void*)(unsigned long)_handle) #define iscsi_handle(_ptr) ((uint64_t)(unsigned long)_ptr) #define hostdata_session(_hostdata) (iscsi_ptr(*(unsigned long *)_hostdata)) diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 16602a547a63..b41cf077e54b 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -63,25 +63,28 @@ struct iscsi_transport { int max_lun; unsigned int max_conn; unsigned int max_cmd_len; - struct Scsi_Host *(*create_session) (struct scsi_transport_template *t, - uint32_t initial_cmdsn); - void (*destroy_session) (struct Scsi_Host *shost); - struct iscsi_cls_conn *(*create_conn) (struct Scsi_Host *shost, + struct iscsi_cls_session *(*create_session) + (struct scsi_transport_template *t, uint32_t sn, uint32_t *sid); + void (*destroy_session) (struct iscsi_cls_session *session); + struct iscsi_cls_conn *(*create_conn) (struct iscsi_cls_session *sess, uint32_t cid); - int (*bind_conn) (iscsi_sessionh_t session, iscsi_connh_t conn, + int (*bind_conn) (struct iscsi_cls_session *session, + struct iscsi_cls_conn *cls_conn, uint32_t transport_fd, int is_leading); - int (*start_conn) (iscsi_connh_t conn); - void (*stop_conn) (iscsi_connh_t conn, int flag); + int (*start_conn) (struct iscsi_cls_conn *conn); + void (*stop_conn) (struct iscsi_cls_conn *conn, int flag); void (*destroy_conn) (struct iscsi_cls_conn *conn); - int (*set_param) (iscsi_connh_t conn, enum iscsi_param param, + int (*set_param) (struct iscsi_cls_conn *conn, enum iscsi_param param, uint32_t value); - int (*get_conn_param) (void *conndata, enum iscsi_param param, + int (*get_conn_param) (struct iscsi_cls_conn *conn, + enum iscsi_param param, uint32_t *value); - int (*get_session_param) (struct Scsi_Host *shost, + int (*get_session_param) (struct iscsi_cls_session *session, enum iscsi_param param, uint32_t *value); - int (*send_pdu) (iscsi_connh_t conn, struct iscsi_hdr *hdr, + int (*send_pdu) (struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size); - void (*get_stats) (iscsi_connh_t conn, struct iscsi_stats *stats); + void (*get_stats) (struct iscsi_cls_conn *conn, + struct iscsi_stats *stats); }; /* @@ -93,15 +96,14 @@ extern int iscsi_unregister_transport(struct iscsi_transport *tt); /* * control plane upcalls */ -extern void iscsi_conn_error(iscsi_connh_t conn, enum iscsi_err error); -extern int iscsi_recv_pdu(iscsi_connh_t conn, struct iscsi_hdr *hdr, +extern void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error); +extern int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size); struct iscsi_cls_conn { struct list_head conn_list; /* item in connlist */ void *dd_data; /* LLD private data */ struct iscsi_transport *transport; - iscsi_connh_t connh; int active; /* must be accessed with the connlock */ struct device dev; /* sysfs transport/container device */ struct mempool_zone *z_error; @@ -113,7 +115,7 @@ struct iscsi_cls_conn { container_of(_dev, struct iscsi_cls_conn, dev) struct iscsi_cls_session { - struct list_head list; /* item in session_list */ + struct list_head sess_list; /* item in session_list */ struct iscsi_transport *transport; struct device dev; /* sysfs transport/container device */ }; -- cgit v1.2.3 From 40ad7a6afc53217ad95b5ae2221e42d7655e057b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 12 Feb 2006 23:30:11 -0800 Subject: [SPARC]: sys_newfstatat --> sys_fstatat64 Signed-off-by: David S. Miller --- arch/sparc/kernel/systbls.S | 2 +- arch/sparc64/kernel/sys_sparc32.c | 21 +++++++++++++++++++++ arch/sparc64/kernel/systbls.S | 4 ++-- include/asm-sparc/unistd.h | 2 +- include/asm-sparc64/unistd.h | 2 +- 5 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S index c0314705d73a..768de64b371f 100644 --- a/arch/sparc/kernel/systbls.S +++ b/arch/sparc/kernel/systbls.S @@ -76,7 +76,7 @@ sys_call_table: /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .long sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl, sys_openat -/*285*/ .long sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_newfstatat +/*285*/ .long sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64 /*290*/ .long sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat /*295*/ .long sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 9264ccbaaafa..417727bd87ba 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -428,6 +428,27 @@ asmlinkage long compat_sys_fstat64(unsigned int fd, return error; } +asmlinkage long compat_sys_fstatat64(unsigned int dfd, char __user *filename, + struct compat_stat64 __user * statbuf, int flag) +{ + struct kstat stat; + int error = -EINVAL; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + if (flag & AT_SYMLINK_NOFOLLOW) + error = vfs_lstat_fd(dfd, filename, &stat); + else + error = vfs_stat_fd(dfd, filename, &stat); + + if (!error) + error = cp_compat_stat64(&stat, statbuf); + +out: + return error; +} + asmlinkage long compat_sys_sysfs(int option, u32 arg1, u32 arg2) { return sys_sysfs(option, arg1, arg2); diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S index a19168510be2..c3adb7ac167d 100644 --- a/arch/sparc64/kernel/systbls.S +++ b/arch/sparc64/kernel/systbls.S @@ -77,7 +77,7 @@ sys_call_table32: /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid /*280*/ .word sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat - .word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_newfstatat + .word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64 /*285*/ .word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat .word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare @@ -146,7 +146,7 @@ sys_call_table: /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .word sys_nis_syscall, sys_add_key, sys_request_key, sys_keyctl, sys_openat - .word sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_newfstatat + .word sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64 /*285*/ .word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat .word sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h index 0615d601a7c6..64ec640a40ee 100644 --- a/include/asm-sparc/unistd.h +++ b/include/asm-sparc/unistd.h @@ -305,7 +305,7 @@ #define __NR_mknodat 286 #define __NR_fchownat 287 #define __NR_futimesat 288 -#define __NR_newfstatat 289 +#define __NR_fstatat64 289 #define __NR_unlinkat 290 #define __NR_renameat 291 #define __NR_linkat 292 diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h index c58ba8a096cf..a284986b1541 100644 --- a/include/asm-sparc64/unistd.h +++ b/include/asm-sparc64/unistd.h @@ -307,7 +307,7 @@ #define __NR_mknodat 286 #define __NR_fchownat 287 #define __NR_futimesat 288 -#define __NR_newfstatat 289 +#define __NR_fstatat64 289 #define __NR_unlinkat 290 #define __NR_renameat 291 #define __NR_linkat 292 -- cgit v1.2.3 From 56f3a40a5e7586043260669cc794e56fa58339e1 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 13 Feb 2006 11:39:57 +0100 Subject: [Bluetooth] Reduce L2CAP MTU for RFCOMM connections This patch reduces the default L2CAP MTU for all RFCOMM connections from 1024 to 1013 to improve the interoperability with some broken RFCOMM implementations. To make this more flexible the L2CAP MTU becomes also a module parameter and so it can changed at runtime. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/rfcomm.h | 2 +- net/bluetooth/rfcomm/core.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index bbfac86734ec..89d743cfdfdf 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -33,7 +33,7 @@ #define RFCOMM_DEFAULT_MTU 127 #define RFCOMM_DEFAULT_CREDITS 7 -#define RFCOMM_MAX_L2CAP_MTU 1024 +#define RFCOMM_MAX_L2CAP_MTU 1013 #define RFCOMM_MAX_CREDITS 40 #define RFCOMM_SKB_HEAD_RESERVE 8 diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 0d89d6434136..5b4253c61f62 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -46,13 +46,15 @@ #include #include -#define VERSION "1.6" - #ifndef CONFIG_BT_RFCOMM_DEBUG #undef BT_DBG #define BT_DBG(D...) #endif +#define VERSION "1.7" + +static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU; + static struct task_struct *rfcomm_thread; static DECLARE_MUTEX(rfcomm_sem); @@ -623,7 +625,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst /* Set L2CAP options */ sk = sock->sk; lock_sock(sk); - l2cap_pi(sk)->imtu = RFCOMM_MAX_L2CAP_MTU; + l2cap_pi(sk)->imtu = l2cap_mtu; release_sock(sk); s = rfcomm_session_add(sock, BT_BOUND); @@ -1868,7 +1870,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) /* Set L2CAP options */ sk = sock->sk; lock_sock(sk); - l2cap_pi(sk)->imtu = RFCOMM_MAX_L2CAP_MTU; + l2cap_pi(sk)->imtu = l2cap_mtu; release_sock(sk); /* Start listening on the socket */ @@ -2070,6 +2072,9 @@ static void __exit rfcomm_exit(void) module_init(rfcomm_init); module_exit(rfcomm_exit); +module_param(l2cap_mtu, uint, 0644); +MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection"); + MODULE_AUTHOR("Maxim Krasnyansky , Marcel Holtmann "); MODULE_DESCRIPTION("Bluetooth RFCOMM ver " VERSION); MODULE_VERSION(VERSION); -- cgit v1.2.3 From 7a11c4d0635d9f6995736390b8c3346fe6f63d57 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 13 Feb 2006 15:34:11 -0800 Subject: [IRDA]: Ratelimit messages. From: Joe Perches Based upon a patch by Dave Jones. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- include/net/irda/irda.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h index 05a840837fe7..1880e46ecc9b 100644 --- a/include/net/irda/irda.h +++ b/include/net/irda/irda.h @@ -82,9 +82,9 @@ do { if(!(expr)) { \ #define IRDA_ASSERT_LABEL(label) #endif /* CONFIG_IRDA_DEBUG */ -#define IRDA_WARNING(args...) printk(KERN_WARNING args) -#define IRDA_MESSAGE(args...) printk(KERN_INFO args) -#define IRDA_ERROR(args...) printk(KERN_ERR args) +#define IRDA_WARNING(args...) do { if (net_ratelimit()) printk(KERN_WARNING args); } while (0) +#define IRDA_MESSAGE(args...) do { if (net_ratelimit()) printk(KERN_INFO args); } while (0) +#define IRDA_ERROR(args...) do { if (net_ratelimit()) printk(KERN_ERR args); } while (0) /* * Magic numbers used by Linux-IrDA. Random numbers which must be unique to -- cgit v1.2.3 From faead26d7a06605add627f29aee73ba654ce11f9 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 14 Feb 2006 10:42:07 -0600 Subject: [PATCH] add scsi_execute_in_process_context() API We have several points in the SCSI stack (primarily for our device functions) where we need to guarantee process context, but (given the place where the last reference was released) we cannot guarantee this. This API gets around the issue by executing the function directly if the caller has process context, but scheduling a workqueue to execute in process context if the caller doesn't have it. Unfortunately, it requires memory allocation in interrupt context, but it's better than what we have previously. The true solution will require a bit of re-engineering, so isn't appropriate for 2.6.16. Signed-off-by: James Bottomley --- drivers/scsi/scsi_lib.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ include/scsi/scsi.h | 2 ++ 2 files changed, 61 insertions(+) (limited to 'include') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 4a602853a98e..4362dcde74af 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -2248,3 +2249,61 @@ scsi_target_unblock(struct device *dev) device_for_each_child(dev, NULL, target_unblock); } EXPORT_SYMBOL_GPL(scsi_target_unblock); + + +struct work_queue_work { + struct work_struct work; + void (*fn)(void *); + void *data; +}; + +static void execute_in_process_context_work(void *data) +{ + void (*fn)(void *data); + struct work_queue_work *wqw = data; + + fn = wqw->fn; + data = wqw->data; + + kfree(wqw); + + fn(data); +} + +/** + * scsi_execute_in_process_context - reliably execute the routine with user context + * @fn: the function to execute + * @data: data to pass to the function + * + * Executes the function immediately if process context is available, + * otherwise schedules the function for delayed execution. + * + * Returns: 0 - function was executed + * 1 - function was scheduled for execution + * <0 - error + */ +int scsi_execute_in_process_context(void (*fn)(void *data), void *data) +{ + struct work_queue_work *wqw; + + if (!in_interrupt()) { + fn(data); + return 0; + } + + wqw = kmalloc(sizeof(struct work_queue_work), GFP_ATOMIC); + + if (unlikely(!wqw)) { + printk(KERN_ERR "Failed to allocate memory\n"); + WARN_ON(1); + return -ENOMEM; + } + + INIT_WORK(&wqw->work, execute_in_process_context_work, wqw); + wqw->fn = fn; + wqw->data = data; + schedule_work(&wqw->work); + + return 1; +} +EXPORT_SYMBOL_GPL(scsi_execute_in_process_context); diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index c60b8ff2f5e4..9c331258bc27 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -433,4 +433,6 @@ struct scsi_lun { /* Used to obtain the PCI location of a device */ #define SCSI_IOCTL_GET_PCI 0x5387 +int scsi_execute_in_process_context(void (*fn)(void *data), void *data); + #endif /* _SCSI_SCSI_H */ -- cgit v1.2.3 From f32ec77b421ee15bf5a42082b60679e997c07993 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Mon, 28 Nov 2005 13:10:54 +0000 Subject: [MIPS] RM200: Give RM200 it's own timex.h. So we can get rid of config.h and the #ifdef crapola in the generic timex.h. Signed-off-by: Ralf Baechle --- include/asm-mips/mach-generic/timex.h | 11 +---------- include/asm-mips/mach-rm200/timex.h | 13 +++++++++++++ 2 files changed, 14 insertions(+), 10 deletions(-) create mode 100644 include/asm-mips/mach-rm200/timex.h (limited to 'include') diff --git a/include/asm-mips/mach-generic/timex.h b/include/asm-mips/mach-generic/timex.h index c6a2e5f0574a..48b4cfaa0d50 100644 --- a/include/asm-mips/mach-generic/timex.h +++ b/include/asm-mips/mach-generic/timex.h @@ -3,20 +3,11 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2003 by Ralf Baechle + * Copyright (C) 2003, 2005 by Ralf Baechle */ #ifndef __ASM_MACH_GENERIC_TIMEX_H #define __ASM_MACH_GENERIC_TIMEX_H -#include - -/* - * Last remaining user of the i8254 PIC, will be converted, too ... - */ -#ifdef CONFIG_SNI_RM200_PCI -#define CLOCK_TICK_RATE 1193182 -#else #define CLOCK_TICK_RATE 500000 -#endif #endif /* __ASM_MACH_GENERIC_TIMEX_H */ diff --git a/include/asm-mips/mach-rm200/timex.h b/include/asm-mips/mach-rm200/timex.h new file mode 100644 index 000000000000..11ff6cb0f214 --- /dev/null +++ b/include/asm-mips/mach-rm200/timex.h @@ -0,0 +1,13 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003, 2005 by Ralf Baechle + */ +#ifndef __ASM_MACH_RM200_TIMEX_H +#define __ASM_MACH_RM200_TIMEX_H + +#define CLOCK_TICK_RATE 1193182 + +#endif /* __ASM_MACH_RM200_TIMEX_H */ -- cgit v1.2.3 From 359bbd42a5a205234d5943571fc7bf946967ee59 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 9 Feb 2006 12:13:28 +0000 Subject: [MIPS] Fold non-__mips64 case into CONFIG_32BIT case. Signed-off-by: Ralf Baechle --- include/asm-mips/unistd.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h index e7ff9b187783..769305d20108 100644 --- a/include/asm-mips/unistd.h +++ b/include/asm-mips/unistd.h @@ -1184,10 +1184,8 @@ type name (atype a,btype b,ctype c,dtype d,etype e,ftype f) \ #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION -# ifndef __mips64 -# define __ARCH_WANT_STAT64 -# endif # ifdef CONFIG_32BIT +# define __ARCH_WANT_STAT64 # define __ARCH_WANT_SYS_TIME # endif # ifdef CONFIG_MIPS32_O32 -- cgit v1.2.3 From 41700e73995d6c814932cb55e12525bd34be1ca5 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Fri, 10 Feb 2006 00:39:06 +0900 Subject: [MIPS] Add protected_blast_icache_range, blast_icache_range, etc. Add blast_xxx_range(), protected_blast_xxx_range() etc. for common use. They are built by __BUILD_BLAST_CACHE_RANGE(). Use protected_cache_op() macro for various protected_ routines. Output code should be logically same. Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- arch/mips/mm/c-r4k.c | 104 ++++++-------------------------------------- arch/mips/mm/c-tx39.c | 70 ++++------------------------- include/asm-mips/r4kcache.h | 74 +++++++++++++++++-------------- 3 files changed, 64 insertions(+), 184 deletions(-) (limited to 'include') diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index e51c38cef88e..1b71d91e8268 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -471,61 +471,29 @@ struct flush_icache_range_args { static inline void local_r4k_flush_icache_range(void *args) { struct flush_icache_range_args *fir_args = args; - unsigned long dc_lsize = cpu_dcache_line_size(); - unsigned long ic_lsize = cpu_icache_line_size(); - unsigned long sc_lsize = cpu_scache_line_size(); unsigned long start = fir_args->start; unsigned long end = fir_args->end; - unsigned long addr, aend; if (!cpu_has_ic_fills_f_dc) { if (end - start > dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; - addr = start & ~(dc_lsize - 1); - aend = (end - 1) & ~(dc_lsize - 1); - - while (1) { - /* Hit_Writeback_Inv_D */ - protected_writeback_dcache_line(addr); - if (addr == aend) - break; - addr += dc_lsize; - } + protected_blast_dcache_range(start, end); } if (!cpu_icache_snoops_remote_store) { - if (end - start > scache_size) { + if (end - start > scache_size) r4k_blast_scache(); - } else { - addr = start & ~(sc_lsize - 1); - aend = (end - 1) & ~(sc_lsize - 1); - - while (1) { - /* Hit_Writeback_Inv_SD */ - protected_writeback_scache_line(addr); - if (addr == aend) - break; - addr += sc_lsize; - } - } + else + protected_blast_scache_range(start, end); } } if (end - start > icache_size) r4k_blast_icache(); - else { - addr = start & ~(ic_lsize - 1); - aend = (end - 1) & ~(ic_lsize - 1); - while (1) { - /* Hit_Invalidate_I */ - protected_flush_icache_line(addr); - if (addr == aend) - break; - addr += ic_lsize; - } - } + else + protected_blast_icache_range(start, end); } static void r4k_flush_icache_range(unsigned long start, unsigned long end) @@ -619,27 +587,14 @@ static void r4k_flush_icache_page(struct vm_area_struct *vma, static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; - /* Catch bad driver code */ BUG_ON(size == 0); if (cpu_has_subset_pcaches) { - unsigned long sc_lsize = cpu_scache_line_size(); - - if (size >= scache_size) { + if (size >= scache_size) r4k_blast_scache(); - return; - } - - a = addr & ~(sc_lsize - 1); - end = (addr + size - 1) & ~(sc_lsize - 1); - while (1) { - flush_scache_line(a); /* Hit_Writeback_Inv_SD */ - if (a == end) - break; - a += sc_lsize; - } + else + blast_scache_range(addr, addr + size); return; } @@ -651,17 +606,8 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) if (size >= dcache_size) { r4k_blast_dcache(); } else { - unsigned long dc_lsize = cpu_dcache_line_size(); - R4600_HIT_CACHEOP_WAR_IMPL; - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - flush_dcache_line(a); /* Hit_Writeback_Inv_D */ - if (a == end) - break; - a += dc_lsize; - } + blast_dcache_range(addr, addr + size); } bc_wback_inv(addr, size); @@ -669,44 +615,22 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; - /* Catch bad driver code */ BUG_ON(size == 0); if (cpu_has_subset_pcaches) { - unsigned long sc_lsize = cpu_scache_line_size(); - - if (size >= scache_size) { + if (size >= scache_size) r4k_blast_scache(); - return; - } - - a = addr & ~(sc_lsize - 1); - end = (addr + size - 1) & ~(sc_lsize - 1); - while (1) { - flush_scache_line(a); /* Hit_Writeback_Inv_SD */ - if (a == end) - break; - a += sc_lsize; - } + else + blast_scache_range(addr, addr + size); return; } if (size >= dcache_size) { r4k_blast_dcache(); } else { - unsigned long dc_lsize = cpu_dcache_line_size(); - R4600_HIT_CACHEOP_WAR_IMPL; - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - flush_dcache_line(a); /* Hit_Writeback_Inv_D */ - if (a == end) - break; - a += dc_lsize; - } + blast_dcache_range(addr, addr + size); } bc_inv(addr, size); diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c index 0a97a9434eba..7c572bea4a98 100644 --- a/arch/mips/mm/c-tx39.c +++ b/arch/mips/mm/c-tx39.c @@ -44,8 +44,6 @@ __asm__ __volatile__( \ /* TX39H-style cache flush routines. */ static void tx39h_flush_icache_all(void) { - unsigned long start = KSEG0; - unsigned long end = (start + icache_size); unsigned long flags, config; /* disable icache (set ICE#) */ @@ -53,33 +51,18 @@ static void tx39h_flush_icache_all(void) config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); - - /* invalidate icache */ - while (start < end) { - cache16_unroll32(start, Index_Invalidate_I); - start += 0x200; - } - + blast_icache16(); write_c0_conf(config); local_irq_restore(flags); } static void tx39h_dma_cache_wback_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - /* Catch bad driver code */ BUG_ON(size == 0); iob(); - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - invalidate_dcache_line(a); /* Hit_Invalidate_D */ - if (a == end) break; - a += dc_lsize; - } + blast_inv_dcache_range(addr, addr + size); } @@ -241,42 +224,21 @@ static void tx39_flush_data_cache_page(unsigned long addr) static void tx39_flush_icache_range(unsigned long start, unsigned long end) { - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - unsigned long addr, aend; - if (end - start > dcache_size) tx39_blast_dcache(); - else { - addr = start & ~(dc_lsize - 1); - aend = (end - 1) & ~(dc_lsize - 1); - - while (1) { - /* Hit_Writeback_Inv_D */ - protected_writeback_dcache_line(addr); - if (addr == aend) - break; - addr += dc_lsize; - } - } + else + protected_blast_dcache_range(start, end); if (end - start > icache_size) tx39_blast_icache(); else { unsigned long flags, config; - addr = start & ~(dc_lsize - 1); - aend = (end - 1) & ~(dc_lsize - 1); /* disable icache (set ICE#) */ local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); - while (1) { - /* Hit_Invalidate_I */ - protected_flush_icache_line(addr); - if (addr == aend) - break; - addr += dc_lsize; - } + protected_blast_icache_range(start, end); write_c0_conf(config); local_irq_restore(flags); } @@ -311,7 +273,7 @@ static void tx39_flush_icache_page(struct vm_area_struct *vma, struct page *page static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; + unsigned long end; if (((size | addr) & (PAGE_SIZE - 1)) == 0) { end = addr + size; @@ -322,20 +284,13 @@ static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size) } else if (size > dcache_size) { tx39_blast_dcache(); } else { - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - flush_dcache_line(a); /* Hit_Writeback_Inv_D */ - if (a == end) break; - a += dc_lsize; - } + blast_dcache_range(addr, addr + size); } } static void tx39_dma_cache_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; + unsigned long end; if (((size | addr) & (PAGE_SIZE - 1)) == 0) { end = addr + size; @@ -346,14 +301,7 @@ static void tx39_dma_cache_inv(unsigned long addr, unsigned long size) } else if (size > dcache_size) { tx39_blast_dcache(); } else { - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - invalidate_dcache_line(a); /* Hit_Invalidate_D */ - if (a == end) break; - a += dc_lsize; - } + blast_inv_dcache_range(addr, addr + size); } } diff --git a/include/asm-mips/r4kcache.h b/include/asm-mips/r4kcache.h index cc53196efa40..9632c27dad15 100644 --- a/include/asm-mips/r4kcache.h +++ b/include/asm-mips/r4kcache.h @@ -14,6 +14,7 @@ #include #include +#include /* * This macro return a properly sign-extended address suitable as base address @@ -78,22 +79,25 @@ static inline void flush_scache_line(unsigned long addr) cache_op(Hit_Writeback_Inv_SD, addr); } +#define protected_cache_op(op,addr) \ + __asm__ __volatile__( \ + " .set push \n" \ + " .set noreorder \n" \ + " .set mips3 \n" \ + "1: cache %0, (%1) \n" \ + "2: .set pop \n" \ + " .section __ex_table,\"a\" \n" \ + " "STR(PTR)" 1b, 2b \n" \ + " .previous" \ + : \ + : "i" (op), "r" (addr)) + /* * The next two are for badland addresses like signal trampolines. */ static inline void protected_flush_icache_line(unsigned long addr) { - __asm__ __volatile__( - " .set push \n" - " .set noreorder \n" - " .set mips3 \n" - "1: cache %0, (%1) \n" - "2: .set pop \n" - " .section __ex_table,\"a\" \n" - " "STR(PTR)" 1b, 2b \n" - " .previous" - : - : "i" (Hit_Invalidate_I), "r" (addr)); + protected_cache_op(Hit_Invalidate_I, addr); } /* @@ -104,32 +108,12 @@ static inline void protected_flush_icache_line(unsigned long addr) */ static inline void protected_writeback_dcache_line(unsigned long addr) { - __asm__ __volatile__( - " .set push \n" - " .set noreorder \n" - " .set mips3 \n" - "1: cache %0, (%1) \n" - "2: .set pop \n" - " .section __ex_table,\"a\" \n" - " "STR(PTR)" 1b, 2b \n" - " .previous" - : - : "i" (Hit_Writeback_Inv_D), "r" (addr)); + protected_cache_op(Hit_Writeback_Inv_D, addr); } static inline void protected_writeback_scache_line(unsigned long addr) { - __asm__ __volatile__( - " .set push \n" - " .set noreorder \n" - " .set mips3 \n" - "1: cache %0, (%1) \n" - "2: .set pop \n" - " .section __ex_table,\"a\" \n" - " "STR(PTR)" 1b, 2b \n" - " .previous" - : - : "i" (Hit_Writeback_Inv_SD), "r" (addr)); + protected_cache_op(Hit_Writeback_Inv_SD, addr); } /* @@ -295,4 +279,28 @@ __BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 64) __BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64) __BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128) +/* build blast_xxx_range, protected_blast_xxx_range */ +#define __BUILD_BLAST_CACHE_RANGE(pfx, desc, hitop, prot) \ +static inline void prot##blast_##pfx##cache##_range(unsigned long start, \ + unsigned long end) \ +{ \ + unsigned long lsize = cpu_##desc##_line_size(); \ + unsigned long addr = start & ~(lsize - 1); \ + unsigned long aend = (end - 1) & ~(lsize - 1); \ + while (1) { \ + prot##cache_op(hitop, addr); \ + if (addr == aend) \ + break; \ + addr += lsize; \ + } \ +} + +__BUILD_BLAST_CACHE_RANGE(d, dcache, Hit_Writeback_Inv_D, protected_) +__BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, protected_) +__BUILD_BLAST_CACHE_RANGE(i, icache, Hit_Invalidate_I, protected_) +__BUILD_BLAST_CACHE_RANGE(d, dcache, Hit_Writeback_Inv_D, ) +__BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, ) +/* blast_inv_dcache_range */ +__BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, ) + #endif /* _ASM_R4KCACHE_H */ -- cgit v1.2.3 From 3218357c94af92478ef39163163a81e654385320 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 10 Feb 2006 01:31:24 +0000 Subject: [MIPS] More uaccess.h fixes with gcc >= 4.0.1. From Richard Sandiford : This patch caused a miscompilation of the restore_gp_regs() block in restore_sigcontext(). This was in a 32-bit kernel compiled with GCC CVS head. restore_gp_regs() copies 64-bit user fields into 32-bit variables, and in this combination, the new __get_user_asm_ll32() clobbers too many registers. It says: /* * Get a long long 64 using 32 bit registers. */ { \ __asm__ __volatile__( \ "1: lw %1, (%3) \n" \ "2: lw %D1, 4(%3) \n" \ " move %0, $0 \n" \ "3: .section .fixup,\"ax\" \n" \ "4: li %0, %4 \n" \ " move %1, $0 \n" \ " move %D1, $0 \n" \ " j 3b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ " " __UA_ADDR " 1b, 4b \n" \ " " __UA_ADDR " 2b, 4b \n" \ " .previous \n" \ : "=r" (__gu_err), "=&r" (val) \ : "0" (0), "r" (addr), "i" (-EFAULT)); \ } and this requires val (%1) to be a 64-bit value. In the case I saw, gcc was using $3 for the 32-bit val, and wasn't expecting $4 to be clobbered. Signed-off-by: Ralf Baechle --- include/asm-mips/uaccess.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-mips/uaccess.h b/include/asm-mips/uaccess.h index 91d813a37823..7a553e9d44d3 100644 --- a/include/asm-mips/uaccess.h +++ b/include/asm-mips/uaccess.h @@ -266,6 +266,8 @@ do { \ */ #define __get_user_asm_ll32(val, addr) \ { \ + unsigned long long __gu_tmp; \ + \ __asm__ __volatile__( \ "1: lw %1, (%3) \n" \ "2: lw %D1, 4(%3) \n" \ @@ -280,8 +282,9 @@ do { \ " " __UA_ADDR " 1b, 4b \n" \ " " __UA_ADDR " 2b, 4b \n" \ " .previous \n" \ - : "=r" (__gu_err), "=&r" (val) \ + : "=r" (__gu_err), "=&r" (__gu_tmp) \ : "0" (0), "r" (addr), "i" (-EFAULT)); \ + (val) = __gu_tmp; \ } /* -- cgit v1.2.3 From fbb6b3a4ac0ccf12a97c98881d9d873d6dc26fe5 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 10 Feb 2006 14:13:08 +0000 Subject: [MIPS] Get rid of kludgery needed to keep stdargs of old compilers working. Signed-off-by: Ralf Baechle --- arch/mips/Makefile | 1 - include/asm-mips/gcc/sgidefs.h | 17 ----------------- 2 files changed, 18 deletions(-) delete mode 100644 include/asm-mips/gcc/sgidefs.h (limited to 'include') diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 6a57407df1bc..38c0f3360d51 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -94,7 +94,6 @@ endif # machines may also. Since BFD is incredibly buggy with respect to # crossformat linking we rely on the elf2ecoff tool for format conversion. # -cflags-y += -I $(TOPDIR)/include/asm/gcc cflags-y += -G 0 -mno-abicalls -fno-pic -pipe LDFLAGS_vmlinux += -G 0 -static -n -nostdlib MODFLAGS += -mlong-calls diff --git a/include/asm-mips/gcc/sgidefs.h b/include/asm-mips/gcc/sgidefs.h deleted file mode 100644 index 05994371a2af..000000000000 --- a/include/asm-mips/gcc/sgidefs.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * include/sgidefs.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1996 by Ralf Baechle - * - * This file is here to satisfy GCC's expectations. - */ -#ifndef __SGIDEFS_H -#define __SGIDEFS_H - -#include - -#endif /* __SGIDEFS_H */ -- cgit v1.2.3 From 9cf8ff96447f995d5ea18ec9f25dc8dae26501a2 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 13 Feb 2006 09:15:49 +0000 Subject: [MIPS] Fix CPU type bitmasks for MIPS III, IV and V. Signed-off-by: Maciej W. Rozycki Signed-off-by: Ralf Baechle --- include/asm-mips/cpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-mips/cpu.h b/include/asm-mips/cpu.h index 934e063e79f1..818b9a97e214 100644 --- a/include/asm-mips/cpu.h +++ b/include/asm-mips/cpu.h @@ -204,9 +204,9 @@ */ #define MIPS_CPU_ISA_I 0x00000001 #define MIPS_CPU_ISA_II 0x00000002 -#define MIPS_CPU_ISA_III 0x00000003 -#define MIPS_CPU_ISA_IV 0x00000004 -#define MIPS_CPU_ISA_V 0x00000005 +#define MIPS_CPU_ISA_III 0x00000004 +#define MIPS_CPU_ISA_IV 0x00000008 +#define MIPS_CPU_ISA_V 0x00000010 #define MIPS_CPU_ISA_M32R1 0x00000020 #define MIPS_CPU_ISA_M32R2 0x00000040 #define MIPS_CPU_ISA_M64R1 0x00000080 -- cgit v1.2.3 From a6b14fa6fdc01ab3519c2729624f808677539b59 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 14 Feb 2006 15:01:12 -0800 Subject: [IA64] Count disabled cpus as potential hot-pluggable CPUs Have a facility to account for potentially hot-pluggable CPUs. ACPI doesnt give a determinstic method to find hot-pluggable CPUs. Hence we use 2 methods to assist. - BIOS can mark potentially hot-pluggable CPUs as disabled in the MADT tables. - User can specify the number of hot-pluggable CPUs via parameter additional_cpus=X The option is enabled only if ACPI_CONFIG_HOTPLUG_CPU=y which enables the physical hotplug option. Without which user can still use logical onlining and offlining of CPUs by enabling CONFIG_HOTPLUG_CPU=y Adds more bits to cpu_possible_map for potentially hot-pluggable cpus. Signed-off-by: Ashok Raj Signed-off-by: Tony Luck --- arch/ia64/kernel/acpi.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/ia64/kernel/setup.c | 4 ++++ include/asm-ia64/acpi.h | 2 ++ 3 files changed, 62 insertions(+) (limited to 'include') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index d2702c419cf8..34795ede72e0 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -761,6 +761,62 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) return (0); } +int additional_cpus __initdata = -1; + +static __init int setup_additional_cpus(char *s) +{ + if (s) + additional_cpus = simple_strtol(s, NULL, 0); + + return 0; +} + +early_param("additional_cpus", setup_additional_cpus); + +/* + * cpu_possible_map should be static, it cannot change as cpu's + * are onlined, or offlined. The reason is per-cpu data-structures + * are allocated by some modules at init time, and dont expect to + * do this dynamically on cpu arrival/departure. + * cpu_present_map on the other hand can change dynamically. + * In case when cpu_hotplug is not compiled, then we resort to current + * behaviour, which is cpu_possible == cpu_present. + * - Ashok Raj + * + * Three ways to find out the number of additional hotplug CPUs: + * - If the BIOS specified disabled CPUs in ACPI/mptables use that. + * - The user can overwrite it with additional_cpus=NUM + * - Otherwise don't reserve additional CPUs. + */ +__init void prefill_possible_map(void) +{ + int i; + int possible, disabled_cpus; + + disabled_cpus = total_cpus - available_cpus; + if (additional_cpus == -1) { + if (disabled_cpus > 0) { + possible = total_cpus; + additional_cpus = disabled_cpus; + } + else { + possible = available_cpus; + additional_cpus = 0; + } + } else { + possible = available_cpus + additional_cpus; + } + if (possible > NR_CPUS) + possible = NR_CPUS; + + printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", + possible, + max_t(int, additional_cpus, 0)); + + for (i = 0; i < possible; i++) + cpu_set(i, cpu_possible_map); +} + int acpi_map_lsapic(acpi_handle handle, int *pcpu) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 35f7835294a3..3258e09278d0 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -430,6 +430,7 @@ setup_arch (char **cmdline_p) if (early_console_setup(*cmdline_p) == 0) mark_bsp_online(); + parse_early_param(); #ifdef CONFIG_ACPI /* Initialize the ACPI boot-time table parser */ acpi_table_init(); @@ -688,6 +689,9 @@ void setup_per_cpu_areas (void) { /* start_kernel() requires this... */ +#ifdef CONFIG_ACPI_HOTPLUG_CPU + prefill_possible_map(); +#endif } /* diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h index 3a544ffc5008..f7a517654308 100644 --- a/include/asm-ia64/acpi.h +++ b/include/asm-ia64/acpi.h @@ -106,6 +106,8 @@ extern unsigned int can_cpei_retarget(void); extern unsigned int is_cpu_cpei_target(unsigned int cpu); extern void set_cpei_target_cpu(unsigned int cpu); extern unsigned int get_cpei_target_cpu(void); +extern void prefill_possible_map(void); +extern int additional_cpus; #ifdef CONFIG_ACPI_NUMA /* Proximity bitmap length; _PXM is at most 255 (8 bit)*/ -- cgit v1.2.3 From 7c8903f6373f9abecf060bad53ca36bc4ac037f2 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 14 Feb 2006 13:53:03 -0800 Subject: [PATCH] jbd: revert checkpoint list changes This patch reverts commit f93ea411b73594f7d144855fd34278bcf34a9afc: [PATCH] jbd: split checkpoint lists This broke journal_flush() for OCFS2, which is its method of being sure that metadata is sent to disk for another node. And two related commits 8d3c7fce2d20ecc3264c8d8c91ae3beacdeaed1b and 43c3e6f5abdf6acac9b90c86bf03f995bf7d3d92 with the subjects: [PATCH] jbd: log_do_checkpoint fix [PATCH] jbd: remove_transaction fix These seem to be incremental bugfixes on the original patch and as such are no longer needed. Signed-off-by: Mark Fasheh Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/checkpoint.c | 418 ++++++++++++++++++++++------------------------------ fs/jbd/commit.c | 3 +- include/linux/jbd.h | 8 +- 3 files changed, 179 insertions(+), 250 deletions(-) (limited to 'include') diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index e6265a0b56b8..543ed543d1e5 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -24,75 +24,29 @@ #include /* - * Unlink a buffer from a transaction checkpoint list. + * Unlink a buffer from a transaction. * * Called with j_list_lock held. */ -static void __buffer_unlink_first(struct journal_head *jh) +static inline void __buffer_unlink(struct journal_head *jh) { transaction_t *transaction; transaction = jh->b_cp_transaction; + jh->b_cp_transaction = NULL; jh->b_cpnext->b_cpprev = jh->b_cpprev; jh->b_cpprev->b_cpnext = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) { + if (transaction->t_checkpoint_list == jh) transaction->t_checkpoint_list = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) - transaction->t_checkpoint_list = NULL; - } -} - -/* - * Unlink a buffer from a transaction checkpoint(io) list. - * - * Called with j_list_lock held. - */ - -static inline void __buffer_unlink(struct journal_head *jh) -{ - transaction_t *transaction; - - transaction = jh->b_cp_transaction; - - __buffer_unlink_first(jh); - if (transaction->t_checkpoint_io_list == jh) { - transaction->t_checkpoint_io_list = jh->b_cpnext; - if (transaction->t_checkpoint_io_list == jh) - transaction->t_checkpoint_io_list = NULL; - } -} - -/* - * Move a buffer from the checkpoint list to the checkpoint io list - * - * Called with j_list_lock held - */ - -static inline void __buffer_relink_io(struct journal_head *jh) -{ - transaction_t *transaction; - - transaction = jh->b_cp_transaction; - __buffer_unlink_first(jh); - - if (!transaction->t_checkpoint_io_list) { - jh->b_cpnext = jh->b_cpprev = jh; - } else { - jh->b_cpnext = transaction->t_checkpoint_io_list; - jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; - jh->b_cpprev->b_cpnext = jh; - jh->b_cpnext->b_cpprev = jh; - } - transaction->t_checkpoint_io_list = jh; + if (transaction->t_checkpoint_list == jh) + transaction->t_checkpoint_list = NULL; } /* * Try to release a checkpointed buffer from its transaction. - * Returns 1 if we released it and 2 if we also released the - * whole transaction. - * + * Returns 1 if we released it. * Requires j_list_lock * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ @@ -103,11 +57,12 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { JBUFFER_TRACE(jh, "remove from checkpoint list"); - ret = __journal_remove_checkpoint(jh) + 1; + __journal_remove_checkpoint(jh); jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); BUFFER_TRACE(bh, "release"); __brelse(bh); + ret = 1; } else { jbd_unlock_bh_state(bh); } @@ -162,53 +117,83 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) } /* - * Clean up transaction's list of buffers submitted for io. - * We wait for any pending IO to complete and remove any clean - * buffers. Note that we take the buffers in the opposite ordering - * from the one in which they were submitted for IO. + * Clean up a transaction's checkpoint list. + * + * We wait for any pending IO to complete and make sure any clean + * buffers are removed from the transaction. + * + * Return 1 if we performed any actions which might have destroyed the + * checkpoint. (journal_remove_checkpoint() deletes the transaction when + * the last checkpoint buffer is cleansed) * * Called with j_list_lock held. */ - -static void __wait_cp_io(journal_t *journal, transaction_t *transaction) +static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) { - struct journal_head *jh; + struct journal_head *jh, *next_jh, *last_jh; struct buffer_head *bh; - tid_t this_tid; - int released = 0; - - this_tid = transaction->t_tid; -restart: - /* Didn't somebody clean up the transaction in the meanwhile */ - if (journal->j_checkpoint_transactions != transaction || - transaction->t_tid != this_tid) - return; - while (!released && transaction->t_checkpoint_io_list) { - jh = transaction->t_checkpoint_io_list; + int ret = 0; + + assert_spin_locked(&journal->j_list_lock); + jh = transaction->t_checkpoint_list; + if (!jh) + return 0; + + last_jh = jh->b_cpprev; + next_jh = jh; + do { + jh = next_jh; bh = jh2bh(jh); - if (!jbd_trylock_bh_state(bh)) { - jbd_sync_bh(journal, bh); - spin_lock(&journal->j_list_lock); - goto restart; - } if (buffer_locked(bh)) { atomic_inc(&bh->b_count); spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); - spin_lock(&journal->j_list_lock); - goto restart; + goto out_return_1; } + /* - * Now in whatever state the buffer currently is, we know that - * it has been written out and so we can drop it from the list + * This is foul */ - released = __journal_remove_checkpoint(jh); - jbd_unlock_bh_state(bh); - } + if (!jbd_trylock_bh_state(bh)) { + jbd_sync_bh(journal, bh); + goto out_return_1; + } + + if (jh->b_transaction != NULL) { + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; + + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + log_start_commit(journal, tid); + log_wait_commit(journal, tid); + goto out_return_1; + } + + /* + * AKPM: I think the buffer_jbddirty test is redundant - it + * shouldn't have NULL b_transaction? + */ + next_jh = jh->b_cpnext; + if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) { + BUFFER_TRACE(bh, "remove from checkpoint"); + __journal_remove_checkpoint(jh); + jbd_unlock_bh_state(bh); + journal_remove_journal_head(bh); + __brelse(bh); + ret = 1; + } else { + jbd_unlock_bh_state(bh); + } + } while (jh != last_jh); + + return ret; +out_return_1: + spin_lock(&journal->j_list_lock); + return 1; } #define NR_BATCH 64 @@ -218,7 +203,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; + spin_unlock(&journal->j_list_lock); ll_rw_block(SWRITE, *batch_count, bhs); + spin_lock(&journal->j_list_lock); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); @@ -234,46 +221,19 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Return 1 if something happened which requires us to abort the current * scan of the checkpoint list. * - * Called with j_list_lock held and drops it if 1 is returned + * Called with j_list_lock held. * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ -static int __process_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count) +static int __flush_buffer(journal_t *journal, struct journal_head *jh, + struct buffer_head **bhs, int *batch_count, + int *drop_count) { struct buffer_head *bh = jh2bh(jh); int ret = 0; - if (buffer_locked(bh)) { - get_bh(bh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - put_bh(bh); - ret = 1; - } - else if (jh->b_transaction != NULL) { - transaction_t *t = jh->b_transaction; - tid_t tid = t->t_tid; + if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) { + J_ASSERT_JH(jh, jh->b_transaction == NULL); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - log_start_commit(journal, tid); - log_wait_commit(journal, tid); - ret = 1; - } - else if (!buffer_dirty(bh)) { - J_ASSERT_JH(jh, !buffer_jbddirty(bh)); - BUFFER_TRACE(bh, "remove from checkpoint"); - __journal_remove_checkpoint(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); - put_bh(bh); - ret = 1; - } - else { /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal lock. @@ -286,30 +246,45 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, J_ASSERT_BH(bh, !buffer_jwrite(bh)); set_buffer_jwrite(bh); bhs[*batch_count] = bh; - __buffer_relink_io(jh); jbd_unlock_bh_state(bh); (*batch_count)++; if (*batch_count == NR_BATCH) { - spin_unlock(&journal->j_list_lock); __flush_batch(journal, bhs, batch_count); ret = 1; } + } else { + int last_buffer = 0; + if (jh->b_cpnext == jh) { + /* We may be about to drop the transaction. Tell the + * caller that the lists have changed. + */ + last_buffer = 1; + } + if (__try_to_free_cp_buf(jh)) { + (*drop_count)++; + ret = last_buffer; + } } return ret; } /* - * Perform an actual checkpoint. We take the first transaction on the - * list of transactions to be checkpointed and send all its buffers - * to disk. We submit larger chunks of data at once. + * Perform an actual checkpoint. We don't write out only enough to + * satisfy the current blocked requests: rather we submit a reasonably + * sized chunk of the outstanding data to disk at once for + * efficiency. __log_wait_for_space() will retry if we didn't free enough. * + * However, we _do_ take into account the amount requested so that once + * the IO has been queued, we can return as soon as enough of it has + * completed to disk. + * * The journal should be locked before calling this function. */ int log_do_checkpoint(journal_t *journal) { - transaction_t *transaction; - tid_t this_tid; int result; + int batch_count = 0; + struct buffer_head *bhs[NR_BATCH]; jbd_debug(1, "Start checkpoint\n"); @@ -324,70 +299,79 @@ int log_do_checkpoint(journal_t *journal) return result; /* - * OK, we need to start writing disk blocks. Take one transaction - * and write it. + * OK, we need to start writing disk blocks. Try to free up a + * quarter of the log in a single checkpoint if we can. */ - spin_lock(&journal->j_list_lock); - if (!journal->j_checkpoint_transactions) - goto out; - transaction = journal->j_checkpoint_transactions; - this_tid = transaction->t_tid; -restart: /* - * If someone cleaned up this transaction while we slept, we're - * done (maybe it's a new transaction, but it fell at the same - * address). + * AKPM: check this code. I had a feeling a while back that it + * degenerates into a busy loop at unmount time. */ - if (journal->j_checkpoint_transactions == transaction && - transaction->t_tid == this_tid) { - int batch_count = 0; - struct buffer_head *bhs[NR_BATCH]; - struct journal_head *jh; - int retry = 0; - - while (!retry && transaction->t_checkpoint_list) { + spin_lock(&journal->j_list_lock); + while (journal->j_checkpoint_transactions) { + transaction_t *transaction; + struct journal_head *jh, *last_jh, *next_jh; + int drop_count = 0; + int cleanup_ret, retry = 0; + tid_t this_tid; + + transaction = journal->j_checkpoint_transactions; + this_tid = transaction->t_tid; + jh = transaction->t_checkpoint_list; + last_jh = jh->b_cpprev; + next_jh = jh; + do { struct buffer_head *bh; - jh = transaction->t_checkpoint_list; + jh = next_jh; + next_jh = jh->b_cpnext; bh = jh2bh(jh); if (!jbd_trylock_bh_state(bh)) { jbd_sync_bh(journal, bh); + spin_lock(&journal->j_list_lock); retry = 1; break; } - retry = __process_buffer(journal, jh, bhs, - &batch_count); - if (!retry && - lock_need_resched(&journal->j_list_lock)) { - spin_unlock(&journal->j_list_lock); + retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); + if (cond_resched_lock(&journal->j_list_lock)) { retry = 1; break; } - } + } while (jh != last_jh && !retry); if (batch_count) { - if (!retry) { - spin_unlock(&journal->j_list_lock); - retry = 1; - } __flush_batch(journal, bhs, &batch_count); + retry = 1; } - if (retry) { - spin_lock(&journal->j_list_lock); - goto restart; - } /* - * Now we have cleaned up the first transaction's checkpoint - * list. Let's clean up the second one. + * If someone cleaned up this transaction while we slept, we're + * done + */ + if (journal->j_checkpoint_transactions != transaction) + break; + if (retry) + continue; + /* + * Maybe it's a new transaction, but it fell at the same + * address */ - __wait_cp_io(journal, transaction); + if (transaction->t_tid != this_tid) + continue; + /* + * We have walked the whole transaction list without + * finding anything to write to disk. We had better be + * able to make some progress or we are in trouble. + */ + cleanup_ret = __cleanup_transaction(journal, transaction); + J_ASSERT(drop_count != 0 || cleanup_ret != 0); + if (journal->j_checkpoint_transactions != transaction) + break; } -out: spin_unlock(&journal->j_list_lock); result = cleanup_journal_tail(journal); if (result < 0) return result; + return 0; } @@ -471,53 +455,6 @@ int cleanup_journal_tail(journal_t *journal) /* Checkpoint list management */ -/* - * journal_clean_one_cp_list - * - * Find all the written-back checkpoint buffers in the given list and release them. - * - * Called with the journal locked. - * Called with j_list_lock held. - * Returns number of bufers reaped (for debug) - */ - -static int journal_clean_one_cp_list(struct journal_head *jh, int *released) -{ - struct journal_head *last_jh; - struct journal_head *next_jh = jh; - int ret, freed = 0; - - *released = 0; - if (!jh) - return 0; - - last_jh = jh->b_cpprev; - do { - jh = next_jh; - next_jh = jh->b_cpnext; - /* Use trylock because of the ranking */ - if (jbd_trylock_bh_state(jh2bh(jh))) { - ret = __try_to_free_cp_buf(jh); - if (ret) { - freed++; - if (ret == 2) { - *released = 1; - return freed; - } - } - } - /* - * This function only frees up some memory if possible so we - * dont have an obligation to finish processing. Bail out if - * preemption requested: - */ - if (need_resched()) - return freed; - } while (jh != last_jh); - - return freed; -} - /* * journal_clean_checkpoint_list * @@ -525,38 +462,46 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) * * Called with the journal locked. * Called with j_list_lock held. - * Returns number of buffers reaped (for debug) + * Returns number of bufers reaped (for debug) */ int __journal_clean_checkpoint_list(journal_t *journal) { transaction_t *transaction, *last_transaction, *next_transaction; - int ret = 0, released; + int ret = 0; transaction = journal->j_checkpoint_transactions; - if (!transaction) + if (transaction == 0) goto out; last_transaction = transaction->t_cpprev; next_transaction = transaction; do { + struct journal_head *jh; + transaction = next_transaction; next_transaction = transaction->t_cpnext; - ret += journal_clean_one_cp_list(transaction-> - t_checkpoint_list, &released); - if (need_resched()) - goto out; - if (released) - continue; - /* - * It is essential that we are as careful as in the case of - * t_checkpoint_list with removing the buffer from the list as - * we can possibly see not yet submitted buffers on io_list - */ - ret += journal_clean_one_cp_list(transaction-> - t_checkpoint_io_list, &released); - if (need_resched()) - goto out; + jh = transaction->t_checkpoint_list; + if (jh) { + struct journal_head *last_jh = jh->b_cpprev; + struct journal_head *next_jh = jh; + + do { + jh = next_jh; + next_jh = jh->b_cpnext; + /* Use trylock because of the ranknig */ + if (jbd_trylock_bh_state(jh2bh(jh))) + ret += __try_to_free_cp_buf(jh); + /* + * This function only frees up some memory + * if possible so we dont have an obligation + * to finish processing. Bail out if preemption + * requested: + */ + if (need_resched()) + goto out; + } while (jh != last_jh); + } } while (transaction != last_transaction); out: return ret; @@ -571,22 +516,18 @@ out: * buffer updates committed in that transaction have safely been stored * elsewhere on disk. To achieve this, all of the buffers in a * transaction need to be maintained on the transaction's checkpoint - * lists until they have been rewritten, at which point this function is + * list until they have been rewritten, at which point this function is * called to remove the buffer from the existing transaction's - * checkpoint lists. - * - * The function returns 1 if it frees the transaction, 0 otherwise. + * checkpoint list. * * This function is called with the journal locked. * This function is called with j_list_lock held. - * This function is called with jbd_lock_bh_state(jh2bh(jh)) */ -int __journal_remove_checkpoint(struct journal_head *jh) +void __journal_remove_checkpoint(struct journal_head *jh) { transaction_t *transaction; journal_t *journal; - int ret = 0; JBUFFER_TRACE(jh, "entry"); @@ -597,10 +538,8 @@ int __journal_remove_checkpoint(struct journal_head *jh) journal = transaction->t_journal; __buffer_unlink(jh); - jh->b_cp_transaction = NULL; - if (transaction->t_checkpoint_list != NULL || - transaction->t_checkpoint_io_list != NULL) + if (transaction->t_checkpoint_list != NULL) goto out; JBUFFER_TRACE(jh, "transaction has no more buffers"); @@ -626,10 +565,8 @@ int __journal_remove_checkpoint(struct journal_head *jh) /* Just in case anybody was waiting for more transactions to be checkpointed... */ wake_up(&journal->j_wait_logspace); - ret = 1; out: JBUFFER_TRACE(jh, "exit"); - return ret; } /* @@ -691,7 +628,6 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) J_ASSERT(transaction->t_shadow_list == NULL); J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); - J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(transaction->t_updates == 0); J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction); diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 29e62d98bae6..002ad2bbc769 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -829,8 +829,7 @@ restart_loop: journal->j_committing_transaction = NULL; spin_unlock(&journal->j_state_lock); - if (commit_transaction->t_checkpoint_list == NULL && - commit_transaction->t_checkpoint_io_list == NULL) { + if (commit_transaction->t_checkpoint_list == NULL) { __journal_drop_transaction(journal, commit_transaction); } else { if (journal->j_checkpoint_transactions == NULL) { diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 0fe4aa891ddc..41ee79962bb2 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -497,12 +497,6 @@ struct transaction_s */ struct journal_head *t_checkpoint_list; - /* - * Doubly-linked circular list of all buffers submitted for IO while - * checkpointing. [j_list_lock] - */ - struct journal_head *t_checkpoint_io_list; - /* * Doubly-linked circular list of temporary buffers currently undergoing * IO in the log [j_list_lock] @@ -852,7 +846,7 @@ extern void journal_commit_transaction(journal_t *); /* Checkpoint list management */ int __journal_clean_checkpoint_list(journal_t *journal); -int __journal_remove_checkpoint(struct journal_head *); +void __journal_remove_checkpoint(struct journal_head *); void __journal_insert_checkpoint(struct journal_head *, transaction_t *); /* Buffer IO */ -- cgit v1.2.3 From 5ac5f9d1ce8492163dbde5d357dc5d03becf7e36 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Feb 2006 13:53:04 -0800 Subject: [PATCH] NLM: Fix the NLM_GRANTED callback checks If 2 threads attached to the same process are blocking on different locks on different files (maybe even on different servers) but have the same lock arguments (i.e. same offset+length - actually quite common, since most processes try to lock the entire file) then the first GRANTED call that wakes one up will also wake the other. Currently when the NLM_GRANTED callback comes in, lockd walks the list of blocked locks in search of a match to the lock that the NLM server has granted. Although it checks the lock pid, start and end, it fails to check the filehandle and the server address. By checking the filehandle and server IP address, we ensure that this only happens if the locks truly are referencing the same file. Signed-off-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/lockd/clntlock.c | 27 +++++++++++++++++---------- fs/lockd/svc4proc.c | 2 +- fs/lockd/svcproc.c | 2 +- include/linux/lockd/lockd.h | 6 +++--- 4 files changed, 22 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 3eaf6e701087..da6354baa0b8 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -111,9 +111,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout) /* * The server lockd has called us back to tell us the lock was granted */ -u32 -nlmclnt_grant(struct nlm_lock *lock) +u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) { + const struct file_lock *fl = &lock->fl; + const struct nfs_fh *fh = &lock->fh; struct nlm_wait *block; u32 res = nlm_lck_denied; @@ -122,14 +123,20 @@ nlmclnt_grant(struct nlm_lock *lock) * Warning: must not use cookie to match it! */ list_for_each_entry(block, &nlm_blocked, b_list) { - if (nlm_compare_locks(block->b_lock, &lock->fl)) { - /* Alright, we found a lock. Set the return status - * and wake up the caller - */ - block->b_status = NLM_LCK_GRANTED; - wake_up(&block->b_wait); - res = nlm_granted; - } + struct file_lock *fl_blocked = block->b_lock; + + if (!nlm_compare_locks(fl_blocked, fl)) + continue; + if (!nlm_cmp_addr(&block->b_host->h_addr, addr)) + continue; + if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_dentry->d_inode) ,fh) != 0) + continue; + /* Alright, we found a lock. Set the return status + * and wake up the caller + */ + block->b_status = NLM_LCK_GRANTED; + wake_up(&block->b_wait); + res = nlm_granted; } return res; } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 4063095d849e..b10f913aa06a 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -228,7 +228,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; dprintk("lockd: GRANTED called\n"); - resp->status = nlmclnt_grant(&argp->lock); + resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock); dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); return rpc_success; } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 3bc437e0cf5b..35681d9cf1fc 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -256,7 +256,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; dprintk("lockd: GRANTED called\n"); - resp->status = nlmclnt_grant(&argp->lock); + resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock); dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); return rpc_success; } diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 920766cea79c..ef21ed296039 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -149,7 +149,7 @@ struct nlm_rqst * nlmclnt_alloc_call(void); int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl); void nlmclnt_finish_block(struct nlm_rqst *req); long nlmclnt_block(struct nlm_rqst *req, long timeout); -u32 nlmclnt_grant(struct nlm_lock *); +u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *); void nlmclnt_recovery(struct nlm_host *, u32); int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); int nlmclnt_setgrantargs(struct nlm_rqst *, struct nlm_lock *); @@ -204,7 +204,7 @@ nlmsvc_file_inode(struct nlm_file *file) * Compare two host addresses (needs modifying for ipv6) */ static __inline__ int -nlm_cmp_addr(struct sockaddr_in *sin1, struct sockaddr_in *sin2) +nlm_cmp_addr(const struct sockaddr_in *sin1, const struct sockaddr_in *sin2) { return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; } @@ -214,7 +214,7 @@ nlm_cmp_addr(struct sockaddr_in *sin1, struct sockaddr_in *sin2) * When the second lock is of type F_UNLCK, this acts like a wildcard. */ static __inline__ int -nlm_compare_locks(struct file_lock *fl1, struct file_lock *fl2) +nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { return fl1->fl_pid == fl2->fl_pid && fl1->fl_start == fl2->fl_start -- cgit v1.2.3 From 8b09fb34513225d87d511c7e8f29c0fd3cf860e0 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 14 Feb 2006 13:53:05 -0800 Subject: [PATCH] fix x86 topology export in sysfs for subarchitectures The correct way to export hyperthreading based functions is to predicate them on CONFIG_X86_HT. Without this, the topology exporting patch breaks the build on all non-PC x86 subarchitectures. Signed-off-by: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index af503a122b23..aa958c6ee83e 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -27,7 +27,7 @@ #ifndef _ASM_I386_TOPOLOGY_H #define _ASM_I386_TOPOLOGY_H -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT #define topology_physical_package_id(cpu) \ (phys_proc_id[cpu] == BAD_APICID ? -1 : phys_proc_id[cpu]) #define topology_core_id(cpu) \ -- cgit v1.2.3 From f822566165dd46ff5de9bf895cfa6c51f53bb0c4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 14 Feb 2006 13:53:08 -0800 Subject: [PATCH] madvise MADV_DONTFORK/MADV_DOFORK Currently, copy-on-write may change the physical address of a page even if the user requested that the page is pinned in memory (either by mlock or by get_user_pages). This happens if the process forks meanwhile, and the parent writes to that page. As a result, the page is orphaned: in case of get_user_pages, the application will never see any data hardware DMA's into this page after the COW. In case of mlock'd memory, the parent is not getting the realtime/security benefits of mlock. In particular, this affects the Infiniband modules which do DMA from and into user pages all the time. This patch adds madvise options to control whether memory range is inherited across fork. Useful e.g. for when hardware is doing DMA from/into these pages. Could also be useful to an application wanting to speed up its forks by cutting large areas out of consideration. Signed-off-by: Michael S. Tsirkin Acked-by: Hugh Dickins Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/mman.h | 2 ++ include/asm-arm/mman.h | 2 ++ include/asm-arm26/mman.h | 2 ++ include/asm-cris/mman.h | 2 ++ include/asm-frv/mman.h | 2 ++ include/asm-h8300/mman.h | 2 ++ include/asm-i386/mman.h | 2 ++ include/asm-ia64/mman.h | 2 ++ include/asm-m32r/mman.h | 2 ++ include/asm-m68k/mman.h | 2 ++ include/asm-mips/mman.h | 2 ++ include/asm-parisc/mman.h | 2 ++ include/asm-powerpc/mman.h | 2 ++ include/asm-s390/mman.h | 2 ++ include/asm-sh/mman.h | 2 ++ include/asm-sparc/mman.h | 2 ++ include/asm-sparc64/mman.h | 2 ++ include/asm-v850/mman.h | 2 ++ include/asm-x86_64/mman.h | 2 ++ include/asm-xtensa/mman.h | 2 ++ mm/madvise.c | 21 +++++++++++++++++---- 21 files changed, 57 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-alpha/mman.h b/include/asm-alpha/mman.h index f6439532a262..a21515c16a43 100644 --- a/include/asm-alpha/mman.h +++ b/include/asm-alpha/mman.h @@ -43,6 +43,8 @@ #define MADV_SPACEAVAIL 5 /* ensure resources are available */ #define MADV_DONTNEED 6 /* don't need these pages */ #define MADV_REMOVE 7 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-arm/mman.h b/include/asm-arm/mman.h index f0bebca2ac21..693ed859e632 100644 --- a/include/asm-arm/mman.h +++ b/include/asm-arm/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-arm26/mman.h b/include/asm-arm26/mman.h index 0ed7780541fa..2096c50df888 100644 --- a/include/asm-arm26/mman.h +++ b/include/asm-arm26/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-cris/mman.h b/include/asm-cris/mman.h index 5a382b8bf3f7..deddfb239ff5 100644 --- a/include/asm-cris/mman.h +++ b/include/asm-cris/mman.h @@ -38,6 +38,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-frv/mman.h b/include/asm-frv/mman.h index 8af4a41c255e..d3bca306da82 100644 --- a/include/asm-frv/mman.h +++ b/include/asm-frv/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-h8300/mman.h b/include/asm-h8300/mman.h index 744a8fb485c2..ac0346f7d11d 100644 --- a/include/asm-h8300/mman.h +++ b/include/asm-h8300/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-i386/mman.h b/include/asm-i386/mman.h index ba4941e6f643..ab2339a1d807 100644 --- a/include/asm-i386/mman.h +++ b/include/asm-i386/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-ia64/mman.h b/include/asm-ia64/mman.h index 828beb24a20e..357ebb780cc0 100644 --- a/include/asm-ia64/mman.h +++ b/include/asm-ia64/mman.h @@ -44,6 +44,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-m32r/mman.h b/include/asm-m32r/mman.h index 12e29747bc84..6b02fe3fcff2 100644 --- a/include/asm-m32r/mman.h +++ b/include/asm-m32r/mman.h @@ -38,6 +38,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-m68k/mman.h b/include/asm-m68k/mman.h index ea262ab88b3b..efd12bc4ccb7 100644 --- a/include/asm-m68k/mman.h +++ b/include/asm-m68k/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-mips/mman.h b/include/asm-mips/mman.h index dd17c8bd62a1..6d01e26830fa 100644 --- a/include/asm-mips/mman.h +++ b/include/asm-mips/mman.h @@ -66,6 +66,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-parisc/mman.h b/include/asm-parisc/mman.h index 736b0abcac05..a381cf5c8f55 100644 --- a/include/asm-parisc/mman.h +++ b/include/asm-parisc/mman.h @@ -49,6 +49,8 @@ #define MADV_4M_PAGES 22 /* Use 4 Megabyte pages */ #define MADV_16M_PAGES 24 /* Use 16 Megabyte pages */ #define MADV_64M_PAGES 26 /* Use 64 Megabyte pages */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-powerpc/mman.h b/include/asm-powerpc/mman.h index a2e34c21b44f..fcff25d13f13 100644 --- a/include/asm-powerpc/mman.h +++ b/include/asm-powerpc/mman.h @@ -45,6 +45,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-s390/mman.h b/include/asm-s390/mman.h index c8d5409b5d56..d41ca1477010 100644 --- a/include/asm-s390/mman.h +++ b/include/asm-s390/mman.h @@ -44,6 +44,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-sh/mman.h b/include/asm-sh/mman.h index 693bd55a3710..0e08d0573abc 100644 --- a/include/asm-sh/mman.h +++ b/include/asm-sh/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-sparc/mman.h b/include/asm-sparc/mman.h index 98435ad8619e..4a298b2be859 100644 --- a/include/asm-sparc/mman.h +++ b/include/asm-sparc/mman.h @@ -55,6 +55,8 @@ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_FREE 0x5 /* (Solaris) contents can be freed */ #define MADV_REMOVE 0x6 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-sparc64/mman.h b/include/asm-sparc64/mman.h index cb4b6156194d..d705ec92da8b 100644 --- a/include/asm-sparc64/mman.h +++ b/include/asm-sparc64/mman.h @@ -55,6 +55,8 @@ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_FREE 0x5 /* (Solaris) contents can be freed */ #define MADV_REMOVE 0x6 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-v850/mman.h b/include/asm-v850/mman.h index edc79965193a..7b851c310e41 100644 --- a/include/asm-v850/mman.h +++ b/include/asm-v850/mman.h @@ -33,6 +33,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-x86_64/mman.h b/include/asm-x86_64/mman.h index d0e97b74f735..b699a38c1c3c 100644 --- a/include/asm-x86_64/mman.h +++ b/include/asm-x86_64/mman.h @@ -37,6 +37,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-xtensa/mman.h b/include/asm-xtensa/mman.h index 082a7504925e..e2d7afb679c8 100644 --- a/include/asm-xtensa/mman.h +++ b/include/asm-xtensa/mman.h @@ -73,6 +73,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/mm/madvise.c b/mm/madvise.c index ae0ae3ea299a..af3d573b0141 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -22,16 +22,23 @@ static long madvise_behavior(struct vm_area_struct * vma, struct mm_struct * mm = vma->vm_mm; int error = 0; pgoff_t pgoff; - int new_flags = vma->vm_flags & ~VM_READHINTMASK; + int new_flags = vma->vm_flags; switch (behavior) { + case MADV_NORMAL: + new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; + break; case MADV_SEQUENTIAL: - new_flags |= VM_SEQ_READ; + new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ; break; case MADV_RANDOM: - new_flags |= VM_RAND_READ; + new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ; break; - default: + case MADV_DONTFORK: + new_flags |= VM_DONTCOPY; + break; + case MADV_DOFORK: + new_flags &= ~VM_DONTCOPY; break; } @@ -177,6 +184,12 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, long error; switch (behavior) { + case MADV_DOFORK: + if (vma->vm_flags & VM_IO) { + error = -EINVAL; + break; + } + case MADV_DONTFORK: case MADV_NORMAL: case MADV_SEQUENTIAL: case MADV_RANDOM: -- cgit v1.2.3 From d6077cb80cde4506720f9165eba99ee07438513f Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Tue, 14 Feb 2006 13:53:10 -0800 Subject: [PATCH] sched: revert "filter affine wakeups" Revert commit d7102e95b7b9c00277562c29aad421d2d521c5f6: [PATCH] sched: filter affine wakeups Apparently caused more than 10% performance regression for aim7 benchmark. The setup in use is 16-cpu HP rx8620, 64Gb of memory and 12 MSA1000s with 144 disks. Each disk is 72Gb with a single ext3 filesystem (courtesy of HP, who supplied benchmark results). The problem is, for aim7, the wake-up pattern is random, but it still needs load balancing action in the wake-up path to achieve best performance. With the above commit, lack of load balancing hurts that workload. However, for workloads like database transaction processing, the requirement is exactly opposite. In the wake up path, best performance is achieved with absolutely zero load balancing. We simply wake up the process on the CPU that it was previously run. Worst performance is obtained when we do load balancing at wake up. There isn't an easy way to auto detect the workload characteristics. Ingo's earlier patch that detects idle CPU and decide whether to load balance or not doesn't perform with aim7 either since all CPUs are busy (it causes even bigger perf. regression). Revert commit d7102e95b7b9c00277562c29aad421d2d521c5f6, which causes more than 10% performance regression with aim7. Signed-off-by: Ken Chen Acked-by: Ingo Molnar Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 5 +---- kernel/sched.c | 10 +--------- 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c1da0269a18..b6f51e3a38ec 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -697,11 +697,8 @@ struct task_struct { int lock_depth; /* BKL lock depth */ -#if defined(CONFIG_SMP) - int last_waker_cpu; /* CPU that last woke this task up */ -#if defined(__ARCH_WANT_UNLOCKED_CTXSW) +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) int oncpu; -#endif #endif int prio, static_prio; struct list_head run_list; diff --git a/kernel/sched.c b/kernel/sched.c index 87d93be336a1..66d957227de9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1204,9 +1204,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync) } } - if (p->last_waker_cpu != this_cpu) - goto out_set_cpu; - if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) goto out_set_cpu; @@ -1277,8 +1274,6 @@ out_set_cpu: cpu = task_cpu(p); } - p->last_waker_cpu = this_cpu; - out_activate: #endif /* CONFIG_SMP */ if (old_state == TASK_UNINTERRUPTIBLE) { @@ -1360,12 +1355,9 @@ void fastcall sched_fork(task_t *p, int clone_flags) #ifdef CONFIG_SCHEDSTATS memset(&p->sched_info, 0, sizeof(p->sched_info)); #endif -#if defined(CONFIG_SMP) - p->last_waker_cpu = cpu; -#if defined(__ARCH_WANT_UNLOCKED_CTXSW) +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) p->oncpu = 0; #endif -#endif #ifdef CONFIG_PREEMPT /* Want to start with kernel preemption disabled. */ task_thread_info(p)->preempt_count = 1; -- cgit v1.2.3 From 68f624fc8b9fa50de9cc0ebd612ef7b7b9fa32d0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Feb 2006 13:53:18 -0800 Subject: [PATCH] FRV: Miscellaneous fixes Make various alterations and fixes to the FRV arch: (1) Resyncs the FRV system call collection with the i386 arch. (2) Discards __iounmap() as it's not used. (3) Fixes the use of the SWAP/SWAPI instruction to get the arguments the right way around in atomic.h, and also to get the asm constraints correct. (4) Moves copy_to/from_user_page() to asm/cacheflush.h to be consistent with other archs. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/kernel/entry.S | 26 +++++++++++++++++++++----- arch/frv/mm/kmap.c | 9 --------- include/asm-frv/atomic.h | 6 +++--- include/asm-frv/cacheflush.h | 12 ++++++++++++ include/asm-frv/io.h | 1 - include/asm-frv/uaccess.h | 3 --- include/asm-frv/unistd.h | 28 ++++++++++++++++++++++------ 7 files changed, 58 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index 5f6548388b74..c69d499d28cf 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -1418,11 +1418,27 @@ sys_call_table: .long sys_add_key .long sys_request_key .long sys_keyctl - .long sys_ni_syscall // sys_vperfctr_open - .long sys_ni_syscall // sys_vperfctr_control /* 290 */ - .long sys_ni_syscall // sys_vperfctr_unlink - .long sys_ni_syscall // sys_vperfctr_iresume - .long sys_ni_syscall // sys_vperfctr_read + .long sys_ioprio_set + .long sys_ioprio_get /* 290 */ + .long sys_inotify_init + .long sys_inotify_add_watch + .long sys_inotify_rm_watch + .long sys_migrate_pages + .long sys_openat /* 295 */ + .long sys_mkdirat + .long sys_mknodat + .long sys_fchownat + .long sys_futimesat + .long sys_newfstatat /* 300 */ + .long sys_unlinkat + .long sys_renameat + .long sys_linkat + .long sys_symlinkat + .long sys_readlinkat /* 305 */ + .long sys_fchmodat + .long sys_faccessat + .long sys_pselect6 + .long sys_ppoll syscall_table_size = (. - sys_call_table) diff --git a/arch/frv/mm/kmap.c b/arch/frv/mm/kmap.c index 539f45e6d15e..c54f18e65ea6 100644 --- a/arch/frv/mm/kmap.c +++ b/arch/frv/mm/kmap.c @@ -43,15 +43,6 @@ void iounmap(void *addr) { } -/* - * __iounmap unmaps nearly everything, so be careful - * it doesn't free currently pointer/page tables anymore but it - * wans't used anyway and might be added later. - */ -void __iounmap(void *addr, unsigned long size) -{ -} - /* * Set new cache mode for some kernel address space. * The caller must push data for that range itself, if such data may already diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h index a59f684b4f33..5d9f84bfdcad 100644 --- a/include/asm-frv/atomic.h +++ b/include/asm-frv/atomic.h @@ -220,9 +220,9 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig switch (sizeof(__xg_orig)) { \ case 4: \ asm volatile( \ - "swap%I0 %2,%M0" \ - : "+m"(*__xg_ptr), "=&r"(__xg_orig) \ - : "r"(x) \ + "swap%I0 %M0,%1" \ + : "+m"(*__xg_ptr), "=r"(__xg_orig) \ + : "1"(x) \ : "memory" \ ); \ break; \ diff --git a/include/asm-frv/cacheflush.h b/include/asm-frv/cacheflush.h index 3007deccb490..eaa5826bc1c8 100644 --- a/include/asm-frv/cacheflush.h +++ b/include/asm-frv/cacheflush.h @@ -87,5 +87,17 @@ static inline void flush_icache_page(struct vm_area_struct *vma, struct page *pa flush_icache_user_range(vma, page, page_to_phys(page), PAGE_SIZE); } +/* + * permit ptrace to access another process's address space through the icache + * and the dcache + */ +#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ +do { \ + memcpy((dst), (src), (len)); \ + flush_icache_user_range((vma), (page), (vaddr), (len)); \ +} while(0) + +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy((dst), (src), (len)) #endif /* _ASM_CACHEFLUSH_H */ diff --git a/include/asm-frv/io.h b/include/asm-frv/io.h index 075369b1a34b..01247cb2bc39 100644 --- a/include/asm-frv/io.h +++ b/include/asm-frv/io.h @@ -251,7 +251,6 @@ static inline void writel(uint32_t datum, volatile void __iomem *addr) #define IOMAP_WRITETHROUGH 3 extern void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag); -extern void __iounmap(void __iomem *addr, unsigned long size); static inline void __iomem *ioremap(unsigned long physaddr, unsigned long size) { diff --git a/include/asm-frv/uaccess.h b/include/asm-frv/uaccess.h index b6bcbe01f6ee..a1d140438863 100644 --- a/include/asm-frv/uaccess.h +++ b/include/asm-frv/uaccess.h @@ -306,7 +306,4 @@ extern long strnlen_user(const char *src, long count); extern unsigned long search_exception_table(unsigned long addr); -#define copy_to_user_page(vma, page, vaddr, dst, src, len) memcpy(dst, src, len) -#define copy_from_user_page(vma, page, vaddr, dst, src, len) memcpy(dst, src, len) - #endif /* _ASM_UACCESS_H */ diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h index 4d994d2e99e3..322531caa484 100644 --- a/include/asm-frv/unistd.h +++ b/include/asm-frv/unistd.h @@ -295,13 +295,29 @@ #define __NR_add_key 286 #define __NR_request_key 287 #define __NR_keyctl 288 -#define __NR_vperfctr_open 289 -#define __NR_vperfctr_control (__NR_perfctr_info+1) -#define __NR_vperfctr_unlink (__NR_perfctr_info+2) -#define __NR_vperfctr_iresume (__NR_perfctr_info+3) -#define __NR_vperfctr_read (__NR_perfctr_info+4) +#define __NR_ioprio_set 289 +#define __NR_ioprio_get 290 +#define __NR_inotify_init 291 +#define __NR_inotify_add_watch 292 +#define __NR_inotify_rm_watch 293 +#define __NR_migrate_pages 294 +#define __NR_openat 295 +#define __NR_mkdirat 296 +#define __NR_mknodat 297 +#define __NR_fchownat 298 +#define __NR_futimesat 299 +#define __NR_newfstatat 300 +#define __NR_unlinkat 301 +#define __NR_renameat 302 +#define __NR_linkat 303 +#define __NR_symlinkat 304 +#define __NR_readlinkat 305 +#define __NR_fchmodat 306 +#define __NR_faccessat 307 +#define __NR_pselect6 308 +#define __NR_ppoll 309 -#define NR_syscalls 294 +#define NR_syscalls 310 /* * process the return value of a syscall, consigning it to one of two possible fates -- cgit v1.2.3 From 28baebae73c3ea8b75c7cae225a7db817ab825a9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Feb 2006 13:53:20 -0800 Subject: [PATCH] FRV: Use virtual interrupt disablement Make the FRV arch use virtual interrupt disablement because accesses to the processor status register (PSR) are relatively slow and because we will soon have the need to deal with multiple interrupt controls at the same time (separate h/w and inter-core interrupts). The way this is done is to dedicate one of the four integer condition code registers (ICC2) to maintaining a virtual interrupt disablement state whilst inside the kernel. This uses the ICC2.Z flag (Zero) to indicate whether the interrupts are virtually disabled and the ICC2.C flag (Carry) to indicate whether the interrupts are physically disabled. ICC2.Z is set to indicate interrupts are virtually disabled. ICC2.C is set to indicate interrupts are physically enabled. Under normal running conditions Z==0 and C==1. Disabling interrupts with local_irq_disable() doesn't then actually physically disable interrupts - it merely sets ICC2.Z to 1. Should an interrupt then happen, the exception prologue will note ICC2.Z is set and branch out of line using one instruction (an unlikely BEQ). Here it will physically disable interrupts and clear ICC2.C. When it comes time to enable interrupts (local_irq_enable()), this simply clears the ICC2.Z flag and invokes a trap #2 if both Z and C flags are clear (the HI integer condition). This can be done with the TIHI conditional trap instruction. The trap then physically reenables interrupts and sets ICC2.C again. Upon returning the interrupt will be taken as interrupts will then be enabled. Note that whilst processing the trap, the whole exceptions system is disabled, and so an interrupt can't happen till it returns. If no pending interrupt had happened, ICC2.C would still be set, the HI condition would not be fulfilled, and no trap will happen. Saving interrupts (local_irq_save) is simply a matter of pulling the ICC2.Z flag out of the CCR register, shifting it down and masking it off. This gives a result of 0 if interrupts were enabled and 1 if they weren't. Restoring interrupts (local_irq_restore) is then a matter of taking the saved value mentioned previously and XOR'ing it against 1. If it was one, the result will be zero, and if it was zero the result will be non-zero. This result is then used to affect the ICC2.Z flag directly (it is a condition code flag after all). An XOR instruction does not affect the Carry flag, and so that bit of state is unchanged. The two flags can then be sampled to see if they're both zero using the trap (TIHI) as for the unconditional reenablement (local_irq_enable). This patch also: (1) Modifies the debugging stub (break.S) to handle single-stepping crossing into the trap #2 handler and into virtually disabled interrupts. (2) Removes superseded fixup pointers from the second instructions in the trap tables (there's no a separate fixup table for this). (3) Declares the trap #3 vector for use in .org directives in the trap table. (4) Moves irq_enter() and irq_exit() in do_IRQ() to avoid problems with virtual interrupt handling, and removes the duplicate code that has now been folded into irq_exit() (softirq and preemption handling). (5) Tells the compiler in the arch Makefile that ICC2 is now reserved. (6) Documents the in-kernel ABI, including the virtual interrupts. (7) Renames the old irq management functions to different names. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/fujitsu/frv/kernel-ABI.txt | 234 +++++++++++++++++++++++++++++++ arch/frv/Makefile | 2 +- arch/frv/kernel/break.S | 77 +++++++++- arch/frv/kernel/entry-table.S | 39 +++++- arch/frv/kernel/entry.S | 65 ++++++++- arch/frv/kernel/head.S | 3 + arch/frv/kernel/irq.c | 41 +----- include/asm-frv/spr-regs.h | 1 + include/asm-frv/system.h | 88 +++++++++++- 9 files changed, 489 insertions(+), 61 deletions(-) create mode 100644 Documentation/fujitsu/frv/kernel-ABI.txt (limited to 'include') diff --git a/Documentation/fujitsu/frv/kernel-ABI.txt b/Documentation/fujitsu/frv/kernel-ABI.txt new file mode 100644 index 000000000000..0ed9b0a779bc --- /dev/null +++ b/Documentation/fujitsu/frv/kernel-ABI.txt @@ -0,0 +1,234 @@ + ================================= + INTERNAL KERNEL ABI FOR FR-V ARCH + ================================= + +The internal FRV kernel ABI is not quite the same as the userspace ABI. A number of the registers +are used for special purposed, and the ABI is not consistent between modules vs core, and MMU vs +no-MMU. + +This partly stems from the fact that FRV CPUs do not have a separate supervisor stack pointer, and +most of them do not have any scratch registers, thus requiring at least one general purpose +register to be clobbered in such an event. Also, within the kernel core, it is possible to simply +jump or call directly between functions using a relative offset. This cannot be extended to modules +for the displacement is likely to be too far. Thus in modules the address of a function to call +must be calculated in a register and then used, requiring two extra instructions. + +This document has the following sections: + + (*) System call register ABI + (*) CPU operating modes + (*) Internal kernel-mode register ABI + (*) Internal debug-mode register ABI + (*) Virtual interrupt handling + + +======================== +SYSTEM CALL REGISTER ABI +======================== + +When a system call is made, the following registers are effective: + + REGISTERS CALL RETURN + =============== ======================= ======================= + GR7 System call number Preserved + GR8 Syscall arg #1 Return value + GR9-GR13 Syscall arg #2-6 Preserved + + +=================== +CPU OPERATING MODES +=================== + +The FR-V CPU has three basic operating modes. In order of increasing capability: + + (1) User mode. + + Basic userspace running mode. + + (2) Kernel mode. + + Normal kernel mode. There are many additional control registers available that may be + accessed in this mode, in addition to all the stuff available to user mode. This has two + submodes: + + (a) Exceptions enabled (PSR.T == 1). + + Exceptions will invoke the appropriate normal kernel mode handler. On entry to the + handler, the PSR.T bit will be cleared. + + (b) Exceptions disabled (PSR.T == 0). + + No exceptions or interrupts may happen. Any mandatory exceptions will cause the CPU to + halt unless the CPU is told to jump into debug mode instead. + + (3) Debug mode. + + No exceptions may happen in this mode. Memory protection and management exceptions will be + flagged for later consideration, but the exception handler won't be invoked. Debugging traps + such as hardware breakpoints and watchpoints will be ignored. This mode is entered only by + debugging events obtained from the other two modes. + + All kernel mode registers may be accessed, plus a few extra debugging specific registers. + + +================================= +INTERNAL KERNEL-MODE REGISTER ABI +================================= + +There are a number of permanent register assignments that are set up by entry.S in the exception +prologue. Note that there is a complete set of exception prologues for each of user->kernel +transition and kernel->kernel transition. There are also user->debug and kernel->debug mode +transition prologues. + + + REGISTER FLAVOUR USE + =============== ======= ==================================================== + GR1 Supervisor stack pointer + GR15 Current thread info pointer + GR16 GP-Rel base register for small data + GR28 Current exception frame pointer (__frame) + GR29 Current task pointer (current) + GR30 Destroyed by kernel mode entry + GR31 NOMMU Destroyed by debug mode entry + GR31 MMU Destroyed by TLB miss kernel mode entry + CCR.ICC2 Virtual interrupt disablement tracking + CCCR.CC3 Cleared by exception prologue (atomic op emulation) + SCR0 MMU See mmu-layout.txt. + SCR1 MMU See mmu-layout.txt. + SCR2 MMU Save for EAR0 (destroyed by icache insns in debug mode) + SCR3 MMU Save for GR31 during debug exceptions + DAMR/IAMR NOMMU Fixed memory protection layout. + DAMR/IAMR MMU See mmu-layout.txt. + + +Certain registers are also used or modified across function calls: + + REGISTER CALL RETURN + =============== =============================== =============================== + GR0 Fixed Zero - + GR2 Function call frame pointer + GR3 Special Preserved + GR3-GR7 - Clobbered + GR8 Function call arg #1 Return value (or clobbered) + GR9 Function call arg #2 Return value MSW (or clobbered) + GR10-GR13 Function call arg #3-#6 Clobbered + GR14 - Clobbered + GR15-GR16 Special Preserved + GR17-GR27 - Preserved + GR28-GR31 Special Only accessed explicitly + LR Return address after CALL Clobbered + CCR/CCCR - Mostly Clobbered + + +================================ +INTERNAL DEBUG-MODE REGISTER ABI +================================ + +This is the same as the kernel-mode register ABI for functions calls. The difference is that in +debug-mode there's a different stack and a different exception frame. Almost all the global +registers from kernel-mode (including the stack pointer) may be changed. + + REGISTER FLAVOUR USE + =============== ======= ==================================================== + GR1 Debug stack pointer + GR16 GP-Rel base register for small data + GR31 Current debug exception frame pointer (__debug_frame) + SCR3 MMU Saved value of GR31 + + +Note that debug mode is able to interfere with the kernel's emulated atomic ops, so it must be +exceedingly careful not to do any that would interact with the main kernel in this regard. Hence +the debug mode code (gdbstub) is almost completely self-contained. The only external code used is +the sprintf family of functions. + +Futhermore, break.S is so complicated because single-step mode does not switch off on entry to an +exception. That means unless manually disabled, single-stepping will blithely go on stepping into +things like interrupts. See gdbstub.txt for more information. + + +========================== +VIRTUAL INTERRUPT HANDLING +========================== + +Because accesses to the PSR is so slow, and to disable interrupts we have to access it twice (once +to read and once to write), we don't actually disable interrupts at all if we don't have to. What +we do instead is use the ICC2 condition code flags to note virtual disablement, such that if we +then do take an interrupt, we note the flag, really disable interrupts, set another flag and resume +execution at the point the interrupt happened. Setting condition flags as a side effect of an +arithmetic or logical instruction is really fast. This use of the ICC2 only occurs within the +kernel - it does not affect userspace. + +The flags we use are: + + (*) CCR.ICC2.Z [Zero flag] + + Set to virtually disable interrupts, clear when interrupts are virtually enabled. Can be + modified by logical instructions without affecting the Carry flag. + + (*) CCR.ICC2.C [Carry flag] + + Clear to indicate hardware interrupts are really disabled, set otherwise. + + +What happens is this: + + (1) Normal kernel-mode operation. + + ICC2.Z is 0, ICC2.C is 1. + + (2) An interrupt occurs. The exception prologue examines ICC2.Z and determines that nothing needs + doing. This is done simply with an unlikely BEQ instruction. + + (3) The interrupts are disabled (local_irq_disable) + + ICC2.Z is set to 1. + + (4) If interrupts were then re-enabled (local_irq_enable): + + ICC2.Z would be set to 0. + + A TIHI #2 instruction (trap #2 if condition HI - Z==0 && C==0) would be used to trap if + interrupts were now virtually enabled, but physically disabled - which they're not, so the + trap isn't taken. The kernel would then be back to state (1). + + (5) An interrupt occurs. The exception prologue examines ICC2.Z and determines that the interrupt + shouldn't actually have happened. It jumps aside, and there disabled interrupts by setting + PSR.PIL to 14 and then it clears ICC2.C. + + (6) If interrupts were then saved and disabled again (local_irq_save): + + ICC2.Z would be shifted into the save variable and masked off (giving a 1). + + ICC2.Z would then be set to 1 (thus unchanged), and ICC2.C would be unaffected (ie: 0). + + (7) If interrupts were then restored from state (6) (local_irq_restore): + + ICC2.Z would be set to indicate the result of XOR'ing the saved value (ie: 1) with 1, which + gives a result of 0 - thus leaving ICC2.Z set. + + ICC2.C would remain unaffected (ie: 0). + + A TIHI #2 instruction would be used to again assay the current state, but this would do + nothing as Z==1. + + (8) If interrupts were then enabled (local_irq_enable): + + ICC2.Z would be cleared. ICC2.C would be left unaffected. Both flags would now be 0. + + A TIHI #2 instruction again issued to assay the current state would then trap as both Z==0 + [interrupts virtually enabled] and C==0 [interrupts really disabled] would then be true. + + (9) The trap #2 handler would simply enable hardware interrupts (set PSR.PIL to 0), set ICC2.C to + 1 and return. + +(10) Immediately upon returning, the pending interrupt would be taken. + +(11) The interrupt handler would take the path of actually processing the interrupt (ICC2.Z is + clear, BEQ fails as per step (2)). + +(12) The interrupt handler would then set ICC2.C to 1 since hardware interrupts are definitely + enabled - or else the kernel wouldn't be here. + +(13) On return from the interrupt handler, things would be back to state (1). + +This trap (#2) is only available in kernel mode. In user mode it will result in SIGILL. diff --git a/arch/frv/Makefile b/arch/frv/Makefile index 90c0fb8d9dc3..d163747d17c0 100644 --- a/arch/frv/Makefile +++ b/arch/frv/Makefile @@ -81,7 +81,7 @@ endif # - reserve CC3 for use with atomic ops # - all the extra registers are dealt with only at context switch time CFLAGS += -mno-fdpic -mgpr-32 -msoft-float -mno-media -CFLAGS += -ffixed-fcc3 -ffixed-cc3 -ffixed-gr15 +CFLAGS += -ffixed-fcc3 -ffixed-cc3 -ffixed-gr15 -ffixed-icc2 AFLAGS += -mno-fdpic ASFLAGS += -mno-fdpic diff --git a/arch/frv/kernel/break.S b/arch/frv/kernel/break.S index 33233dc23e29..687c48d62dde 100644 --- a/arch/frv/kernel/break.S +++ b/arch/frv/kernel/break.S @@ -200,12 +200,20 @@ __break_step: movsg bpcsr,gr2 sethi.p %hi(__entry_kernel_external_interrupt),gr3 setlo %lo(__entry_kernel_external_interrupt),gr3 - subcc gr2,gr3,gr0,icc0 + subcc.p gr2,gr3,gr0,icc0 + sethi %hi(__entry_uspace_external_interrupt),gr3 + setlo.p %lo(__entry_uspace_external_interrupt),gr3 beq icc0,#2,__break_step_kernel_external_interrupt - sethi.p %hi(__entry_uspace_external_interrupt),gr3 - setlo %lo(__entry_uspace_external_interrupt),gr3 - subcc gr2,gr3,gr0,icc0 + subcc.p gr2,gr3,gr0,icc0 + sethi %hi(__entry_kernel_external_interrupt_virtually_disabled),gr3 + setlo.p %lo(__entry_kernel_external_interrupt_virtually_disabled),gr3 beq icc0,#2,__break_step_uspace_external_interrupt + subcc.p gr2,gr3,gr0,icc0 + sethi %hi(__entry_kernel_external_interrupt_virtual_reenable),gr3 + setlo.p %lo(__entry_kernel_external_interrupt_virtual_reenable),gr3 + beq icc0,#2,__break_step_kernel_external_interrupt_virtually_disabled + subcc gr2,gr3,gr0,icc0 + beq icc0,#2,__break_step_kernel_external_interrupt_virtual_reenable LEDS 0x2007,gr2 @@ -254,6 +262,9 @@ __break_step_kernel_softprog_interrupt: # step through an external interrupt from kernel mode .globl __break_step_kernel_external_interrupt __break_step_kernel_external_interrupt: + # deal with virtual interrupt disablement + beq icc2,#0,__break_step_kernel_external_interrupt_virtually_disabled + sethi.p %hi(__entry_kernel_external_interrupt_reentry),gr3 setlo %lo(__entry_kernel_external_interrupt_reentry),gr3 @@ -294,6 +305,64 @@ __break_return_as_kernel_prologue: #endif rett #1 +# we single-stepped into an interrupt handler whilst interrupts were merely virtually disabled +# need to really disable interrupts, set flag, fix up and return +__break_step_kernel_external_interrupt_virtually_disabled: + movsg psr,gr2 + andi gr2,#~PSR_PIL,gr2 + ori gr2,#PSR_PIL_14,gr2 /* debugging interrupts only */ + movgs gr2,psr + + ldi @(gr31,#REG_CCR),gr3 + movgs gr3,ccr + subcc.p gr0,gr0,gr0,icc2 /* leave Z set, clear C */ + + # exceptions must've been enabled and we must've been in supervisor mode + setlos BPSR_BET|BPSR_BS,gr3 + movgs gr3,bpsr + + # return to where the interrupt happened + movsg pcsr,gr2 + movgs gr2,bpcsr + + lddi.p @(gr31,#REG_GR(2)),gr2 + + xor gr31,gr31,gr31 + movgs gr0,brr +#ifdef CONFIG_MMU + movsg scr3,gr31 +#endif + rett #1 + +# we stepped through into the virtual interrupt reenablement trap +# +# we also want to single step anyway, but after fixing up so that we get an event on the +# instruction after the broken-into exception returns + .globl __break_step_kernel_external_interrupt_virtual_reenable +__break_step_kernel_external_interrupt_virtual_reenable: + movsg psr,gr2 + andi gr2,#~PSR_PIL,gr2 + movgs gr2,psr + + ldi @(gr31,#REG_CCR),gr3 + movgs gr3,ccr + subicc gr0,#1,gr0,icc2 /* clear Z, set C */ + + # save the adjusted ICC2 + movsg ccr,gr3 + sti gr3,@(gr31,#REG_CCR) + + # exceptions must've been enabled and we must've been in supervisor mode + setlos BPSR_BET|BPSR_BS,gr3 + movgs gr3,bpsr + + # return to where the trap happened + movsg pcsr,gr2 + movgs gr2,bpcsr + + # and then process the single step + bra __break_continue + # step through an internal exception from uspace mode .globl __break_step_uspace_softprog_interrupt __break_step_uspace_softprog_interrupt: diff --git a/arch/frv/kernel/entry-table.S b/arch/frv/kernel/entry-table.S index 9b9243e2103c..81568acea9cd 100644 --- a/arch/frv/kernel/entry-table.S +++ b/arch/frv/kernel/entry-table.S @@ -116,6 +116,8 @@ __break_kerneltrap_fixup_table: .long __break_step_uspace_external_interrupt .section .trap.kernel .org \tbr_tt + # deal with virtual interrupt disablement + beq icc2,#0,__entry_kernel_external_interrupt_virtually_disabled bra __entry_kernel_external_interrupt .section .trap.fixup.kernel .org \tbr_tt >> 2 @@ -259,25 +261,52 @@ __trap_fixup_kernel_data_tlb_miss: .org TBR_TT_TRAP0 .rept 127 bra __entry_uspace_softprog_interrupt - bra __break_step_uspace_softprog_interrupt - .long 0,0 + .long 0,0,0 .endr .org TBR_TT_BREAK bra __entry_break .long 0,0,0 + .section .trap.fixup.user + .org TBR_TT_TRAP0 >> 2 + .rept 127 + .long __break_step_uspace_softprog_interrupt + .endr + .org TBR_TT_BREAK >> 2 + .long 0 + # miscellaneous kernel mode entry points .section .trap.kernel .org TBR_TT_TRAP0 - .rept 127 bra __entry_kernel_softprog_interrupt - bra __break_step_kernel_softprog_interrupt - .long 0,0 + .org TBR_TT_TRAP1 + bra __entry_kernel_softprog_interrupt + + # trap #2 in kernel - reenable interrupts + .org TBR_TT_TRAP2 + bra __entry_kernel_external_interrupt_virtual_reenable + + # miscellaneous kernel traps + .org TBR_TT_TRAP3 + .rept 124 + bra __entry_kernel_softprog_interrupt + .long 0,0,0 .endr .org TBR_TT_BREAK bra __entry_break .long 0,0,0 + .section .trap.fixup.kernel + .org TBR_TT_TRAP0 >> 2 + .long __break_step_kernel_softprog_interrupt + .long __break_step_kernel_softprog_interrupt + .long __break_step_kernel_external_interrupt_virtual_reenable + .rept 124 + .long __break_step_kernel_softprog_interrupt + .endr + .org TBR_TT_BREAK >> 2 + .long 0 + # miscellaneous debug mode entry points .section .trap.break .org TBR_TT_BREAK diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index c69d499d28cf..1d21c8d34d8a 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -141,7 +141,10 @@ __entry_uspace_external_interrupt_reentry: movsg gner0,gr4 movsg gner1,gr5 - stdi gr4,@(gr28,#REG_GNER0) + stdi.p gr4,@(gr28,#REG_GNER0) + + # interrupts start off fully disabled in the interrupt handler + subcc gr0,gr0,gr0,icc2 /* set Z and clear C */ # set up kernel global registers sethi.p %hi(__kernel_current_task),gr5 @@ -193,9 +196,8 @@ __entry_uspace_external_interrupt_reentry: .type __entry_kernel_external_interrupt,@function __entry_kernel_external_interrupt: LEDS 0x6210 - - sub sp,gr15,gr31 - LEDS32 +// sub sp,gr15,gr31 +// LEDS32 # set up the stack pointer or.p sp,gr0,gr30 @@ -231,7 +233,10 @@ __entry_kernel_external_interrupt_reentry: stdi gr24,@(gr28,#REG_GR(24)) stdi gr26,@(gr28,#REG_GR(26)) sti gr29,@(gr28,#REG_GR(29)) - stdi gr30,@(gr28,#REG_GR(30)) + stdi.p gr30,@(gr28,#REG_GR(30)) + + # note virtual interrupts will be fully enabled upon return + subicc gr0,#1,gr0,icc2 /* clear Z, set C */ movsg tbr ,gr20 movsg psr ,gr22 @@ -267,7 +272,10 @@ __entry_kernel_external_interrupt_reentry: movsg gner0,gr4 movsg gner1,gr5 - stdi gr4,@(gr28,#REG_GNER0) + stdi.p gr4,@(gr28,#REG_GNER0) + + # interrupts start off fully disabled in the interrupt handler + subcc gr0,gr0,gr0,icc2 /* set Z and clear C */ # set the return address sethi.p %hi(__entry_return_from_kernel_interrupt),gr4 @@ -291,6 +299,45 @@ __entry_kernel_external_interrupt_reentry: .size __entry_kernel_external_interrupt,.-__entry_kernel_external_interrupt +############################################################################### +# +# deal with interrupts that were actually virtually disabled +# - we need to really disable them, flag the fact and return immediately +# - if you change this, you must alter break.S also +# +############################################################################### + .balign L1_CACHE_BYTES + .globl __entry_kernel_external_interrupt_virtually_disabled + .type __entry_kernel_external_interrupt_virtually_disabled,@function +__entry_kernel_external_interrupt_virtually_disabled: + movsg psr,gr30 + andi gr30,#~PSR_PIL,gr30 + ori gr30,#PSR_PIL_14,gr30 ; debugging interrupts only + movgs gr30,psr + subcc gr0,gr0,gr0,icc2 ; leave Z set, clear C + rett #0 + + .size __entry_kernel_external_interrupt_virtually_disabled,.-__entry_kernel_external_interrupt_virtually_disabled + +############################################################################### +# +# deal with re-enablement of interrupts that were pending when virtually re-enabled +# - set ICC2.C, re-enable the real interrupts and return +# - we can clear ICC2.Z because we shouldn't be here if it's not 0 [due to TIHI] +# - if you change this, you must alter break.S also +# +############################################################################### + .balign L1_CACHE_BYTES + .globl __entry_kernel_external_interrupt_virtual_reenable + .type __entry_kernel_external_interrupt_virtual_reenable,@function +__entry_kernel_external_interrupt_virtual_reenable: + movsg psr,gr30 + andi gr30,#~PSR_PIL,gr30 ; re-enable interrupts + movgs gr30,psr + subicc gr0,#1,gr0,icc2 ; clear Z, set C + rett #0 + + .size __entry_kernel_external_interrupt_virtual_reenable,.-__entry_kernel_external_interrupt_virtual_reenable ############################################################################### # @@ -335,6 +382,7 @@ __entry_uspace_softprog_interrupt_reentry: sethi.p %hi(__entry_return_from_user_exception),gr23 setlo %lo(__entry_return_from_user_exception),gr23 + bra __entry_common .size __entry_uspace_softprog_interrupt,.-__entry_uspace_softprog_interrupt @@ -495,7 +543,10 @@ __entry_common: movsg gner0,gr4 movsg gner1,gr5 - stdi gr4,@(gr28,#REG_GNER0) + stdi.p gr4,@(gr28,#REG_GNER0) + + # set up virtual interrupt disablement + subicc gr0,#1,gr0,icc2 /* clear Z flag, set C flag */ # set up kernel global registers sethi.p %hi(__kernel_current_task),gr5 diff --git a/arch/frv/kernel/head.S b/arch/frv/kernel/head.S index c73b4fe9f6ca..29a5265489b7 100644 --- a/arch/frv/kernel/head.S +++ b/arch/frv/kernel/head.S @@ -513,6 +513,9 @@ __head_mmu_enabled: movgs gr0,ccr movgs gr0,cccr + # initialise the virtual interrupt handling + subcc gr0,gr0,gr0,icc2 /* set Z, clear C */ + #ifdef CONFIG_MMU movgs gr3,scr2 movgs gr3,scr3 diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c index 59580c59c62c..27ab4c30aac6 100644 --- a/arch/frv/kernel/irq.c +++ b/arch/frv/kernel/irq.c @@ -287,18 +287,11 @@ asmlinkage void do_IRQ(void) struct irq_source *source; int level, cpu; + irq_enter(); + level = (__frame->tbr >> 4) & 0xf; cpu = smp_processor_id(); -#if 0 - { - static u32 irqcount; - *(volatile u32 *) 0xe1200004 = ~((irqcount++ << 8) | level); - *(volatile u16 *) 0xffc00100 = (u16) ~0x9999; - mb(); - } -#endif - if ((unsigned long) __frame - (unsigned long) (current + 1) < 512) BUG(); @@ -308,40 +301,12 @@ asmlinkage void do_IRQ(void) kstat_this_cpu.irqs[level]++; - irq_enter(); - for (source = frv_irq_levels[level].sources; source; source = source->next) source->doirq(source); - irq_exit(); - __clr_MASK(level); - /* only process softirqs if we didn't interrupt another interrupt handler */ - if ((__frame->psr & PSR_PIL) == PSR_PIL_0) - if (local_softirq_pending()) - do_softirq(); - -#ifdef CONFIG_PREEMPT - local_irq_disable(); - while (--current->preempt_count == 0) { - if (!(__frame->psr & PSR_S) || - current->need_resched == 0 || - in_interrupt()) - break; - current->preempt_count++; - local_irq_enable(); - preempt_schedule(); - local_irq_disable(); - } -#endif - -#if 0 - { - *(volatile u16 *) 0xffc00100 = (u16) ~0x6666; - mb(); - } -#endif + irq_exit(); } /* end do_IRQ() */ diff --git a/include/asm-frv/spr-regs.h b/include/asm-frv/spr-regs.h index ef472f058d9c..c2a541ef828d 100644 --- a/include/asm-frv/spr-regs.h +++ b/include/asm-frv/spr-regs.h @@ -98,6 +98,7 @@ #define TBR_TT_TRAP0 (0x80 << 4) #define TBR_TT_TRAP1 (0x81 << 4) #define TBR_TT_TRAP2 (0x82 << 4) +#define TBR_TT_TRAP3 (0x83 << 4) #define TBR_TT_TRAP126 (0xfe << 4) #define TBR_TT_BREAK (0xff << 4) diff --git a/include/asm-frv/system.h b/include/asm-frv/system.h index d2aea70a5f64..f72ff0c4dc0b 100644 --- a/include/asm-frv/system.h +++ b/include/asm-frv/system.h @@ -40,8 +40,84 @@ do { \ /* * interrupt flag manipulation + * - use virtual interrupt management since touching the PSR is slow + * - ICC2.Z: T if interrupts virtually disabled + * - ICC2.C: F if interrupts really disabled + * - if Z==1 upon interrupt: + * - C is set to 0 + * - interrupts are really disabled + * - entry.S returns immediately + * - uses TIHI (TRAP if Z==0 && C==0) #2 to really reenable interrupts + * - if taken, the trap: + * - sets ICC2.C + * - enables interrupts */ -#define local_irq_disable() \ +#define local_irq_disable() \ +do { \ + /* set Z flag, but don't change the C flag */ \ + asm volatile(" andcc gr0,gr0,gr0,icc2 \n" \ + : \ + : \ + : "memory", "icc2" \ + ); \ +} while(0) + +#define local_irq_enable() \ +do { \ + /* clear Z flag and then test the C flag */ \ + asm volatile(" oricc gr0,#1,gr0,icc2 \n" \ + " tihi icc2,gr0,#2 \n" \ + : \ + : \ + : "memory", "icc2" \ + ); \ +} while(0) + +#define local_save_flags(flags) \ +do { \ + typecheck(unsigned long, flags); \ + asm volatile("movsg ccr,%0" \ + : "=r"(flags) \ + : \ + : "memory"); \ + \ + /* shift ICC2.Z to bit 0 */ \ + flags >>= 26; \ + \ + /* make flags 1 if interrupts disabled, 0 otherwise */ \ + flags &= 1UL; \ +} while(0) + +#define irqs_disabled() \ + ({unsigned long flags; local_save_flags(flags); flags; }) + +#define local_irq_save(flags) \ +do { \ + typecheck(unsigned long, flags); \ + local_save_flags(flags); \ + local_irq_disable(); \ +} while(0) + +#define local_irq_restore(flags) \ +do { \ + typecheck(unsigned long, flags); \ + \ + /* load the Z flag by turning 1 if disabled into 0 if disabled \ + * and thus setting the Z flag but not the C flag */ \ + asm volatile(" xoricc %0,#1,gr0,icc2 \n" \ + /* then test Z=0 and C=0 */ \ + " tihi icc2,gr0,#2 \n" \ + : \ + : "r"(flags) \ + : "memory", "icc2" \ + ); \ + \ +} while(0) + +/* + * real interrupt flag manipulation + */ +#define __local_irq_disable() \ do { \ unsigned long psr; \ asm volatile(" movsg psr,%0 \n" \ @@ -53,7 +129,7 @@ do { \ : "memory"); \ } while(0) -#define local_irq_enable() \ +#define __local_irq_enable() \ do { \ unsigned long psr; \ asm volatile(" movsg psr,%0 \n" \ @@ -64,7 +140,7 @@ do { \ : "memory"); \ } while(0) -#define local_save_flags(flags) \ +#define __local_save_flags(flags) \ do { \ typecheck(unsigned long, flags); \ asm("movsg psr,%0" \ @@ -73,7 +149,7 @@ do { \ : "memory"); \ } while(0) -#define local_irq_save(flags) \ +#define __local_irq_save(flags) \ do { \ unsigned long npsr; \ typecheck(unsigned long, flags); \ @@ -86,7 +162,7 @@ do { \ : "memory"); \ } while(0) -#define local_irq_restore(flags) \ +#define __local_irq_restore(flags) \ do { \ typecheck(unsigned long, flags); \ asm volatile(" movgs %0,psr \n" \ @@ -95,7 +171,7 @@ do { \ : "memory"); \ } while(0) -#define irqs_disabled() \ +#define __irqs_disabled() \ ((__get_PSR() & PSR_PIL) >= PSR_PIL_14) /* -- cgit v1.2.3 From ee68cea2c26b7a8222f9020f54d22c6067011e8b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 01:34:23 -0800 Subject: [NETFILTER]: Fix xfrm lookup after SNAT To find out if a packet needs to be handled by IPsec after SNAT, packets are currently rerouted in POST_ROUTING and a new xfrm lookup is done. This breaks SNAT of non-unicast packets to non-local addresses because the packet is routed as incoming packet and no neighbour entry is bound to the dst_entry. In general, it seems to be a bad idea to replace the dst_entry after the packet was already sent to the output routine because its state might not match what's expected. This patch changes the xfrm lookup in POST_ROUTING to re-use the original dst_entry without routing the packet again. This means no policy routing can be used for transport mode transforms (which keep the original route) when packets are SNATed to match the policy, but it looks like the best we can do for now. Signed-off-by: Patrick McHardy Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4.h | 2 +- net/ipv4/netfilter.c | 41 ++++++++++++++++++++++++++++++++++ net/ipv4/netfilter/ip_nat_standalone.c | 6 ++--- 3 files changed, 45 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index fdc4a9527343..43c09d790b83 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -79,7 +79,7 @@ enum nf_ip_hook_priorities { #ifdef __KERNEL__ extern int ip_route_me_harder(struct sk_buff **pskb); - +extern int ip_xfrm_me_harder(struct sk_buff **pskb); #endif /*__KERNEL__*/ #endif /*__LINUX_IP_NETFILTER_H*/ diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 52a3d7c57907..ed42cdc57cd9 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -78,6 +78,47 @@ int ip_route_me_harder(struct sk_buff **pskb) } EXPORT_SYMBOL(ip_route_me_harder); +#ifdef CONFIG_XFRM +int ip_xfrm_me_harder(struct sk_buff **pskb) +{ + struct flowi fl; + unsigned int hh_len; + struct dst_entry *dst; + + if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) + return 0; + if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0) + return -1; + + dst = (*pskb)->dst; + if (dst->xfrm) + dst = ((struct xfrm_dst *)dst)->route; + dst_hold(dst); + + if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0) + return -1; + + dst_release((*pskb)->dst); + (*pskb)->dst = dst; + + /* Change in oif may mean change in hh_len. */ + hh_len = (*pskb)->dst->dev->hard_header_len; + if (skb_headroom(*pskb) < hh_len) { + struct sk_buff *nskb; + + nskb = skb_realloc_headroom(*pskb, hh_len); + if (!nskb) + return -1; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + return 0; +} +EXPORT_SYMBOL(ip_xfrm_me_harder); +#endif + void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(ip_nat_decode_session); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 92c54999a19d..7c3f7d380240 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -235,19 +235,19 @@ ip_nat_out(unsigned int hooknum, return NF_ACCEPT; ret = ip_nat_fn(hooknum, pskb, in, out, okfn); +#ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.src.ip != ct->tuplehash[!dir].tuple.dst.ip -#ifdef CONFIG_XFRM || ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all -#endif ) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP; } +#endif return ret; } -- cgit v1.2.3 From 5ecfbae093f0c37311e89b29bfc0c9d586eace87 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 15 Feb 2006 22:50:10 +0300 Subject: [PATCH] fix zap_thread's ptrace related problems 1. The tracee can go from ptrace_stop() to do_signal_stop() after __ptrace_unlink(p). 2. It is unsafe to __ptrace_unlink(p) while p->parent may wait for tasklist_lock in ptrace_detach(). Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Eric W. Biederman Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- include/linux/ptrace.h | 1 + kernel/ptrace.c | 25 +++++++++++++++---------- 3 files changed, 17 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index 055378d2513e..0e1c95074d42 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1403,7 +1403,7 @@ static void zap_threads (struct mm_struct *mm) do_each_thread(g,p) { if (mm == p->mm && p != tsk && p->ptrace && p->parent->mm == mm) { - __ptrace_unlink(p); + __ptrace_detach(p, 0); } } while_each_thread(g,p); write_unlock_irq(&tasklist_lock); diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 9d5cd106b344..0d36750fc0f1 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -84,6 +84,7 @@ extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __us extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); extern int ptrace_attach(struct task_struct *tsk); extern int ptrace_detach(struct task_struct *, unsigned int); +extern void __ptrace_detach(struct task_struct *, unsigned int); extern void ptrace_disable(struct task_struct *); extern int ptrace_check_attach(struct task_struct *task, int kill); extern int ptrace_request(struct task_struct *child, long request, long addr, long data); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d2cf144d0af5..d95a72c9279d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child) */ void __ptrace_unlink(task_t *child) { - if (!child->ptrace) - BUG(); + BUG_ON(!child->ptrace); + child->ptrace = 0; if (!list_empty(&child->ptrace_list)) { list_del_init(&child->ptrace_list); @@ -184,22 +184,27 @@ bad: return retval; } +void __ptrace_detach(struct task_struct *child, unsigned int data) +{ + child->exit_code = data; + /* .. re-parent .. */ + __ptrace_unlink(child); + /* .. and wake it up. */ + if (child->exit_state != EXIT_ZOMBIE) + wake_up_process(child); +} + int ptrace_detach(struct task_struct *child, unsigned int data) { if (!valid_signal(data)) - return -EIO; + return -EIO; /* Architecture-specific hardware disable .. */ ptrace_disable(child); - /* .. re-parent .. */ - child->exit_code = data; - write_lock_irq(&tasklist_lock); - __ptrace_unlink(child); - /* .. and wake it up. */ - if (child->exit_state != EXIT_ZOMBIE) - wake_up_process(child); + if (child->ptrace) + __ptrace_detach(child, data); write_unlock_irq(&tasklist_lock); return 0; -- cgit v1.2.3 From 8ed9b2c7a804335004e4bd3b4c6989c5b6bc243f Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Mon, 13 Feb 2006 05:29:57 -0500 Subject: [IA64-SGI] sn2 minor fixes and cleanups General SN2 code cleanup: - Do not initialize global variables to zero - Use kzalloc instead of kmalloc+memset - Check kmalloc return values - Do not obfuscate spin lock calls - Remove some unused code - Various formatting cleanups Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/io_init.c | 96 ++++++++++++++------------------- arch/ia64/sn/kernel/sn2/prominfo_proc.c | 25 ++++----- arch/ia64/sn/kernel/sn2/sn2_smp.c | 35 ++++++------ arch/ia64/sn/kernel/sn2/sn_proc_fs.c | 22 ++++---- arch/ia64/sn/kernel/tiocx.c | 4 +- arch/ia64/sn/pci/pci_dma.c | 16 +++--- arch/ia64/sn/pci/pcibr/pcibr_ate.c | 29 +++------- arch/ia64/sn/pci/pcibr/pcibr_dma.c | 14 ++--- arch/ia64/sn/pci/pcibr/pcibr_provider.c | 9 ++-- include/asm-ia64/sn/bte.h | 6 +-- include/asm-ia64/sn/pcibr_provider.h | 14 +---- include/asm-ia64/sn/sn_feature_sets.h | 3 -- 12 files changed, 111 insertions(+), 162 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 3437c2390429..dfb3f2902379 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -23,6 +23,10 @@ #include "xtalk/hubdev.h" #include "xtalk/xwidgetdev.h" + +extern void sn_init_cpei_timer(void); +extern void register_sn_procfs(void); + static struct list_head sn_sysdata_list; /* sysdata list struct */ @@ -40,12 +44,12 @@ struct brick { struct slab_info slab_info[MAX_SLABS + 1]; }; -int sn_ioif_inited = 0; /* SN I/O infrastructure initialized? */ +int sn_ioif_inited; /* SN I/O infrastructure initialized? */ struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ -static int max_segment_number = 0; /* Default highest segment number */ -static int max_pcibus_number = 255; /* Default highest pci bus number */ +static int max_segment_number; /* Default highest segment number */ +static int max_pcibus_number = 255; /* Default highest pci bus number */ /* * Hooks and struct for unsupported pci providers @@ -84,7 +88,6 @@ static inline u64 sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, u64 address) { - struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; ret_stuff.v0 = 0; @@ -94,7 +97,6 @@ sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, (u64) nasid, (u64) widget_num, (u64) device_num, (u64) address, 0, 0, 0); return ret_stuff.status; - } /* @@ -102,7 +104,6 @@ sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, */ static inline u64 sal_get_hubdev_info(u64 handle, u64 address) { - struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; ret_stuff.v0 = 0; @@ -118,7 +119,6 @@ static inline u64 sal_get_hubdev_info(u64 handle, u64 address) */ static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) { - struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; ret_stuff.v0 = 0; @@ -215,7 +215,7 @@ static void __init sn_fixup_ionodes(void) struct hubdev_info *hubdev; u64 status; u64 nasid; - int i, widget, device; + int i, widget, device, size; /* * Get SGI Specific HUB chipset information. @@ -251,48 +251,37 @@ static void __init sn_fixup_ionodes(void) if (!hubdev->hdi_flush_nasid_list.widget_p) continue; + size = (HUB_WIDGET_ID_MAX + 1) * + sizeof(struct sn_flush_device_kernel *); hubdev->hdi_flush_nasid_list.widget_p = - kmalloc((HUB_WIDGET_ID_MAX + 1) * - sizeof(struct sn_flush_device_kernel *), - GFP_KERNEL); - memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0, - (HUB_WIDGET_ID_MAX + 1) * - sizeof(struct sn_flush_device_kernel *)); + kzalloc(size, GFP_KERNEL); + if (!hubdev->hdi_flush_nasid_list.widget_p) + BUG(); for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { - sn_flush_device_kernel = kmalloc(DEV_PER_WIDGET * - sizeof(struct - sn_flush_device_kernel), - GFP_KERNEL); + size = DEV_PER_WIDGET * + sizeof(struct sn_flush_device_kernel); + sn_flush_device_kernel = kzalloc(size, GFP_KERNEL); if (!sn_flush_device_kernel) BUG(); - memset(sn_flush_device_kernel, 0x0, - DEV_PER_WIDGET * - sizeof(struct sn_flush_device_kernel)); dev_entry = sn_flush_device_kernel; for (device = 0; device < DEV_PER_WIDGET; device++,dev_entry++) { - dev_entry->common = kmalloc(sizeof(struct - sn_flush_device_common), - GFP_KERNEL); + size = sizeof(struct sn_flush_device_common); + dev_entry->common = kzalloc(size, GFP_KERNEL); if (!dev_entry->common) BUG(); - memset(dev_entry->common, 0x0, sizeof(struct - sn_flush_device_common)); if (sn_prom_feature_available( PRF_DEVICE_FLUSH_LIST)) status = sal_get_device_dmaflush_list( - nasid, - widget, - device, - (u64)(dev_entry->common)); + nasid, widget, device, + (u64)(dev_entry->common)); else status = sn_device_fixup_war(nasid, - widget, - device, - dev_entry->common); + widget, device, + dev_entry->common); if (status != SALRET_OK) panic("SAL call failed: %s\n", ia64_sal_strerror(status)); @@ -383,13 +372,12 @@ void sn_pci_fixup_slot(struct pci_dev *dev) pci_dev_get(dev); /* for the sysdata pointer */ pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); - if (pcidev_info <= 0) + if (!pcidev_info) BUG(); /* Cannot afford to run out of memory */ - sn_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_KERNEL); - if (sn_irq_info <= 0) + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (!sn_irq_info) BUG(); /* Cannot afford to run out of memory */ - memset(sn_irq_info, 0, sizeof(struct sn_irq_info)); /* Call to retrieve pci device information needed by kernel. */ status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, @@ -482,13 +470,13 @@ void sn_pci_fixup_slot(struct pci_dev *dev) */ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) { - int status = 0; + int status; int nasid, cnode; struct pci_controller *controller; struct sn_pci_controller *sn_controller; struct pcibus_bussoft *prom_bussoft_ptr; struct hubdev_info *hubdev_info; - void *provider_soft = NULL; + void *provider_soft; struct sn_pcibus_provider *provider; status = sal_get_pcibus_info((u64) segment, (u64) busnum, @@ -535,6 +523,8 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) bus->sysdata = controller; if (provider->bus_fixup) provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, controller); + else + provider_soft = NULL; if (provider_soft == NULL) { /* fixup failed or not applicable */ @@ -638,13 +628,8 @@ void sn_bus_free_sysdata(void) static int __init sn_pci_init(void) { - int i = 0; - int j = 0; + int i, j; struct pci_dev *pci_dev = NULL; - extern void sn_init_cpei_timer(void); -#ifdef CONFIG_PROC_FS - extern void register_sn_procfs(void); -#endif if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) return 0; @@ -700,32 +685,29 @@ static int __init sn_pci_init(void) */ void hubdev_init_node(nodepda_t * npda, cnodeid_t node) { - struct hubdev_info *hubdev_info; + int size; + pg_data_t *pg; + + size = sizeof(struct hubdev_info); if (node >= num_online_nodes()) /* Headless/memless IO nodes */ - hubdev_info = - (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(0), - sizeof(struct - hubdev_info)); + pg = NODE_DATA(0); else - hubdev_info = - (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(node), - sizeof(struct - hubdev_info)); - npda->pdinfo = (void *)hubdev_info; + pg = NODE_DATA(node); + hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size); + + npda->pdinfo = (void *)hubdev_info; } geoid_t cnodeid_get_geoid(cnodeid_t cnode) { - struct hubdev_info *hubdev; hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); return hubdev->hdi_geoid; - } subsys_initcall(sn_pci_init); diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c index 81c63b2f8ae9..6ae276d5d50c 100644 --- a/arch/ia64/sn/kernel/sn2/prominfo_proc.c +++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (C) 1999,2001-2004, 2006 Silicon Graphics, Inc. All Rights Reserved. * * Module to export the system's Firmware Interface Tables, including * PROM revision numbers and banners, in /proc @@ -190,7 +190,7 @@ static int read_version_entry(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = 0; + int len; /* data holds the NASID of the node */ len = dump_version(page, (unsigned long)data); @@ -202,7 +202,7 @@ static int read_fit_entry(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = 0; + int len; /* data holds the NASID of the node */ len = dump_fit(page, (unsigned long)data); @@ -229,13 +229,16 @@ int __init prominfo_init(void) struct proc_dir_entry *p; cnodeid_t cnodeid; unsigned long nasid; + int size; char name[NODE_NAME_LEN]; if (!ia64_platform_is("sn2")) return 0; - proc_entries = kmalloc(num_online_nodes() * sizeof(struct proc_dir_entry *), - GFP_KERNEL); + size = num_online_nodes() * sizeof(struct proc_dir_entry *); + proc_entries = kzalloc(size, GFP_KERNEL); + if (!proc_entries) + return -ENOMEM; sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL); @@ -244,14 +247,12 @@ int __init prominfo_init(void) sprintf(name, "node%d", cnodeid); *entp = proc_mkdir(name, sgi_prominfo_entry); nasid = cnodeid_to_nasid(cnodeid); - p = create_proc_read_entry( - "fit", 0, *entp, read_fit_entry, - (void *)nasid); + p = create_proc_read_entry("fit", 0, *entp, read_fit_entry, + (void *)nasid); if (p) p->owner = THIS_MODULE; - p = create_proc_read_entry( - "version", 0, *entp, read_version_entry, - (void *)nasid); + p = create_proc_read_entry("version", 0, *entp, + read_version_entry, (void *)nasid); if (p) p->owner = THIS_MODULE; entp++; @@ -263,7 +264,7 @@ int __init prominfo_init(void) void __exit prominfo_exit(void) { struct proc_dir_entry **entp; - unsigned cnodeid; + unsigned int cnodeid; char name[NODE_NAME_LEN]; entp = proc_entries; diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index f153a4c35c70..24eefb2fc55f 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -46,8 +46,14 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats); static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); -void sn2_ptc_deadlock_recovery(short *, short, short, int, volatile unsigned long *, unsigned long, - volatile unsigned long *, unsigned long); +extern unsigned long +sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); +void +sn2_ptc_deadlock_recovery(short *, short, short, int, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); /* * Note: some is the following is captured here to make degugging easier @@ -59,16 +65,6 @@ void sn2_ptc_deadlock_recovery(short *, short, short, int, volatile unsigned lon #define reset_max_active_on_deadlock() 1 #define PTC_LOCK(sh1) ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock) -static inline void ptc_lock(int sh1, unsigned long *flagp) -{ - spin_lock_irqsave(PTC_LOCK(sh1), *flagp); -} - -static inline void ptc_unlock(int sh1, unsigned long flags) -{ - spin_unlock_irqrestore(PTC_LOCK(sh1), flags); -} - struct ptc_stats { unsigned long ptc_l; unsigned long change_rid; @@ -82,6 +78,8 @@ struct ptc_stats { unsigned long shub_ptc_flushes_not_my_mm; }; +#define sn2_ptctest 0 + static inline unsigned long wait_piowc(void) { volatile unsigned long *piows; @@ -200,7 +198,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, max_active = max_active_pio(shub1); itc = ia64_get_itc(); - ptc_lock(shub1, &flags); + spin_lock_irqsave(PTC_LOCK(shub1), flags); itc2 = ia64_get_itc(); __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; @@ -258,7 +256,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, ia64_srlz_d(); } - ptc_unlock(shub1, flags); + spin_unlock_irqrestore(PTC_LOCK(shub1), flags); preempt_enable(); } @@ -270,11 +268,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, * TLB flush transaction. The recovery sequence is somewhat tricky & is * coded in assembly language. */ -void sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, volatile unsigned long *ptc0, unsigned long data0, - volatile unsigned long *ptc1, unsigned long data1) + +void +sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, + volatile unsigned long *ptc0, unsigned long data0, + volatile unsigned long *ptc1, unsigned long data1) { - extern unsigned long sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, - volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long); short nasid, i; unsigned long *piows, zeroval, n; diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c index a06719d752a0..c686d9c12f7b 100644 --- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -6,11 +6,11 @@ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include -#include #ifdef CONFIG_PROC_FS #include #include +#include #include static int partition_id_show(struct seq_file *s, void *p) @@ -90,10 +90,10 @@ static int coherence_id_open(struct inode *inode, struct file *file) return single_open(file, coherence_id_show, NULL); } -static struct proc_dir_entry *sn_procfs_create_entry( - const char *name, struct proc_dir_entry *parent, - int (*openfunc)(struct inode *, struct file *), - int (*releasefunc)(struct inode *, struct file *)) +static struct proc_dir_entry +*sn_procfs_create_entry(const char *name, struct proc_dir_entry *parent, + int (*openfunc)(struct inode *, struct file *), + int (*releasefunc)(struct inode *, struct file *)) { struct proc_dir_entry *e = create_proc_entry(name, 0444, parent); @@ -126,24 +126,24 @@ void register_sn_procfs(void) return; sn_procfs_create_entry("partition_id", sgi_proc_dir, - partition_id_open, single_release); + partition_id_open, single_release); sn_procfs_create_entry("system_serial_number", sgi_proc_dir, - system_serial_number_open, single_release); + system_serial_number_open, single_release); sn_procfs_create_entry("licenseID", sgi_proc_dir, - licenseID_open, single_release); + licenseID_open, single_release); e = sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir, - sn_force_interrupt_open, single_release); + sn_force_interrupt_open, single_release); if (e) e->proc_fops->write = sn_force_interrupt_write_proc; sn_procfs_create_entry("coherence_id", sgi_proc_dir, - coherence_id_open, single_release); + coherence_id_open, single_release); sn_procfs_create_entry("sn_topology", sgi_proc_dir, - sn_topology_open, sn_topology_release); + sn_topology_open, sn_topology_release); } #endif /* CONFIG_PROC_FS */ diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index d263d3e8fbb9..8a56f8b5ffa2 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -284,12 +284,10 @@ struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq, if ((nasid & 1) == 0) return NULL; - sn_irq_info = kmalloc(sn_irq_size, GFP_KERNEL); + sn_irq_info = kzalloc(sn_irq_size, GFP_KERNEL); if (sn_irq_info == NULL) return NULL; - memset(sn_irq_info, 0x0, sn_irq_size); - status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq, req_nasid, slice); if (status) { diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 5a36292388eb..b4b84c269210 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -335,10 +335,10 @@ int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) */ SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE, - pci_domain_nr(bus), bus->number, - 0, /* io */ - 0, /* read */ - port, size, __pa(val)); + pci_domain_nr(bus), bus->number, + 0, /* io */ + 0, /* read */ + port, size, __pa(val)); if (isrv.status == 0) return size; @@ -381,10 +381,10 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) */ SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE, - pci_domain_nr(bus), bus->number, - 0, /* io */ - 1, /* write */ - port, size, __pa(&val)); + pci_domain_nr(bus), bus->number, + 0, /* io */ + 1, /* write */ + port, size, __pa(&val)); if (isrv.status == 0) return size; diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c index aa3fa5152a32..1f0253bfe0a0 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights reserved. */ #include @@ -12,7 +12,7 @@ #include #include -int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */ +int pcibr_invalidate_ate; /* by default don't invalidate ATE on free */ /* * mark_ate: Mark the ate as either free or inuse. @@ -20,14 +20,12 @@ int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */ static void mark_ate(struct ate_resource *ate_resource, int start, int number, u64 value) { - u64 *ate = ate_resource->ate; int index; int length = 0; for (index = start; length < number; index++, length++) ate[index] = value; - } /* @@ -37,7 +35,6 @@ static void mark_ate(struct ate_resource *ate_resource, int start, int number, static int find_free_ate(struct ate_resource *ate_resource, int start, int count) { - u64 *ate = ate_resource->ate; int index; int start_free; @@ -70,12 +67,10 @@ static int find_free_ate(struct ate_resource *ate_resource, int start, static inline void free_ate_resource(struct ate_resource *ate_resource, int start) { - mark_ate(ate_resource, start, ate_resource->ate[start], 0); if ((ate_resource->lowest_free_index > start) || (ate_resource->lowest_free_index < 0)) ate_resource->lowest_free_index = start; - } /* @@ -84,7 +79,6 @@ static inline void free_ate_resource(struct ate_resource *ate_resource, static inline int alloc_ate_resource(struct ate_resource *ate_resource, int ate_needed) { - int start_index; /* @@ -118,19 +112,12 @@ static inline int alloc_ate_resource(struct ate_resource *ate_resource, */ int pcibr_ate_alloc(struct pcibus_info *pcibus_info, int count) { - int status = 0; - u64 flag; + int status; + unsigned long flags; - flag = pcibr_lock(pcibus_info); + spin_lock_irqsave(&pcibus_info->pbi_lock, flags); status = alloc_ate_resource(&pcibus_info->pbi_int_ate_resource, count); - - if (status < 0) { - /* Failed to allocate */ - pcibr_unlock(pcibus_info, flag); - return -1; - } - - pcibr_unlock(pcibus_info, flag); + spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags); return status; } @@ -182,7 +169,7 @@ void pcibr_ate_free(struct pcibus_info *pcibus_info, int index) ate_write(pcibus_info, index, count, (ate & ~PCI32_ATE_V)); } - flags = pcibr_lock(pcibus_info); + spin_lock_irqsave(&pcibus_info->pbi_lock, flags); free_ate_resource(&pcibus_info->pbi_int_ate_resource, index); - pcibr_unlock(pcibus_info, flags); + spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags); } diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index 54ce5b7ceed2..9f86bb6519aa 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -137,14 +137,12 @@ pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr, pci_addr |= PCI64_ATTR_VIRTUAL; return pci_addr; - } static dma_addr_t pcibr_dmatrans_direct32(struct pcidev_info * info, u64 paddr, size_t req_size, u64 flags) { - struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> pdi_pcibus_info; @@ -171,7 +169,6 @@ pcibr_dmatrans_direct32(struct pcidev_info * info, } return PCI32_DIRECT_BASE | offset; - } /* @@ -218,9 +215,8 @@ void sn_dma_flush(u64 addr) u64 flags; u64 itte; struct hubdev_info *hubinfo; - volatile struct sn_flush_device_kernel *p; - volatile struct sn_flush_device_common *common; - + struct sn_flush_device_kernel *p; + struct sn_flush_device_common *common; struct sn_flush_nasid_entry *flush_nasid_list; if (!sn_ioif_inited) @@ -310,8 +306,7 @@ void sn_dma_flush(u64 addr) (common->sfdl_slot - 1)); } } else { - spin_lock_irqsave((spinlock_t *)&p->sfdl_flush_lock, - flags); + spin_lock_irqsave(&p->sfdl_flush_lock, flags); *common->sfdl_flush_addr = 0; /* force an interrupt. */ @@ -322,8 +317,7 @@ void sn_dma_flush(u64 addr) cpu_relax(); /* okay, everything is synched up. */ - spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock, - flags); + spin_unlock_irqrestore(&p->sfdl_flush_lock, flags); } return; } diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 2fac27049bf6..98f716bd92f0 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -163,9 +163,12 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont /* Setup the PMU ATE map */ soft->pbi_int_ate_resource.lowest_free_index = 0; soft->pbi_int_ate_resource.ate = - kmalloc(soft->pbi_int_ate_size * sizeof(u64), GFP_KERNEL); - memset(soft->pbi_int_ate_resource.ate, 0, - (soft->pbi_int_ate_size * sizeof(u64))); + kzalloc(soft->pbi_int_ate_size * sizeof(u64), GFP_KERNEL); + + if (!soft->pbi_int_ate_resource.ate) { + kfree(soft); + return NULL; + } if (prom_bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) { /* TIO PCI Bridge: find nearest node with CPUs */ diff --git a/include/asm-ia64/sn/bte.h b/include/asm-ia64/sn/bte.h index 01e5b4103235..5335d87ca5f8 100644 --- a/include/asm-ia64/sn/bte.h +++ b/include/asm-ia64/sn/bte.h @@ -46,7 +46,7 @@ #define BTES_PER_NODE (is_shub2() ? 4 : 2) #define MAX_BTES_PER_NODE 4 -#define BTE2OFF_CTRL (0) +#define BTE2OFF_CTRL 0 #define BTE2OFF_SRC (SH2_BT_ENG_SRC_ADDR_0 - SH2_BT_ENG_CSR_0) #define BTE2OFF_DEST (SH2_BT_ENG_DEST_ADDR_0 - SH2_BT_ENG_CSR_0) #define BTE2OFF_NOTIFY (SH2_BT_ENG_NOTIF_ADDR_0 - SH2_BT_ENG_CSR_0) @@ -75,11 +75,11 @@ : base + (BTEOFF_NOTIFY/8)) /* Define hardware modes */ -#define BTE_NOTIFY (IBCT_NOTIFY) +#define BTE_NOTIFY IBCT_NOTIFY #define BTE_NORMAL BTE_NOTIFY #define BTE_ZERO_FILL (BTE_NOTIFY | IBCT_ZFIL_MODE) /* Use a reserved bit to let the caller specify a wait for any BTE */ -#define BTE_WACQUIRE (0x4000) +#define BTE_WACQUIRE 0x4000 /* Use the BTE on the node with the destination memory */ #define BTE_USE_DEST (BTE_WACQUIRE << 1) /* Use any available BTE interface on any node for the transfer */ diff --git a/include/asm-ia64/sn/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h index 9334078b089a..a601d3af39b6 100644 --- a/include/asm-ia64/sn/pcibr_provider.h +++ b/include/asm-ia64/sn/pcibr_provider.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1992-1997,2000-2006 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H #define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H @@ -115,18 +115,6 @@ struct pcibus_info { spinlock_t pbi_lock; }; -/* - * pcibus_info structure locking macros - */ -inline static unsigned long -pcibr_lock(struct pcibus_info *pcibus_info) -{ - unsigned long flag; - spin_lock_irqsave(&pcibus_info->pbi_lock, flag); - return(flag); -} -#define pcibr_unlock(pcibus_info, flag) spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag) - extern int pcibr_init_provider(void); extern void *pcibr_bus_fixup(struct pcibus_bussoft *, struct pci_controller *); extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t); diff --git a/include/asm-ia64/sn/sn_feature_sets.h b/include/asm-ia64/sn/sn_feature_sets.h index 9ca642cad338..ff33e3bd3f8e 100644 --- a/include/asm-ia64/sn/sn_feature_sets.h +++ b/include/asm-ia64/sn/sn_feature_sets.h @@ -12,9 +12,6 @@ */ -#include -#include - /* --------------------- PROM Features -----------------------------*/ extern int sn_prom_feature_available(int id); -- cgit v1.2.3 From d3454344b3507042e5d561d0cfed19e99cf2fc88 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Mon, 13 Feb 2006 05:32:09 -0500 Subject: [IA64] remove obsolete corporate address Remove obsolete SGI address Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/sn2/timer_interrupt.c | 7 +------ drivers/ide/pci/sgiioc4.c | 5 ----- include/asm-ia64/machvec_sn2.h | 7 +------ 3 files changed, 2 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c index adf5db2e2afe..fa7f69945917 100644 --- a/arch/ia64/sn/kernel/sn2/timer_interrupt.c +++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c @@ -1,7 +1,7 @@ /* * * - * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2005, 2006 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -22,11 +22,6 @@ * License along with this program; if not, write the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/NoticeExplan diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c index 2b286e865163..43b96e298363 100644 --- a/drivers/ide/pci/sgiioc4.c +++ b/drivers/ide/pci/sgiioc4.c @@ -13,11 +13,6 @@ * License along with this program; if not, write the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/NoticeExplan diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h index e1b6cd63f49e..03d00faf03b5 100644 --- a/include/asm-ia64/machvec_sn2.h +++ b/include/asm-ia64/machvec_sn2.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2002-2003, 2006 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -20,11 +20,6 @@ * License along with this program; if not, write the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/NoticeExplan -- cgit v1.2.3 From c2a4969ba14e852bf4ee92c7db3b0cf82405a0c9 Mon Sep 17 00:00:00 2001 From: Dean Roe Date: Tue, 14 Feb 2006 15:01:23 -0600 Subject: [IA64-SGI] fix the size of __sn_cnodeid_to_nasid The __sn_cnodeid_to_nasid array was incorrectly sized at MAX_NUMNODES. On a large system, this array could overflow. The following patch corrects this by defining it to MAX_COMPACT_NODES. Signed-off-by: Dean Roe Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/setup.c | 2 +- include/asm-ia64/sn/arch.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 1672ecad77e2..5b84836c2171 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -75,7 +75,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); EXPORT_PER_CPU_SYMBOL(__sn_hub_info); -DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]); +DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); diff --git a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h index 1a3831c04af6..91c31be87b13 100644 --- a/include/asm-ia64/sn/arch.h +++ b/include/asm-ia64/sn/arch.h @@ -70,7 +70,7 @@ DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); * Compact node ID to nasid mappings kept in the per-cpu data areas of each * cpu. */ -DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]); +DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); #define sn_cnodeid_to_nasid (&__get_cpu_var(__sn_cnodeid_to_nasid[0])) -- cgit v1.2.3 From 4c2cd96696ae0896ce4bcf725b9f0eaffafeb640 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Wed, 15 Feb 2006 08:02:21 -0600 Subject: [IA64-SGI] enforce proper ordering of callouts by XPC Fix XPC so that it does not deliver any messages until the connected callout has returned, as well as, prevent the disconnected callout to occur before the disconnecting callout has returned. Signed-off-by: Dean Nelson Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/xpc_channel.c | 8 +++++--- arch/ia64/sn/kernel/xpc_main.c | 20 +++++++++++++------- include/asm-ia64/sn/xpc.h | 31 ++++++++++++++++++------------- 3 files changed, 36 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c index 36e5437a0fb6..cdf6856ce089 100644 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ b/arch/ia64/sn/kernel/xpc_channel.c @@ -738,7 +738,9 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) /* make sure all activity has settled down first */ - if (atomic_read(&ch->references) > 0) { + if (atomic_read(&ch->references) > 0 || + ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE))) { return; } DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); @@ -775,7 +777,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) /* both sides are disconnected now */ - if (ch->flags & XPC_C_CONNECTCALLOUT) { + if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) { spin_unlock_irqrestore(&ch->lock, *irq_flags); xpc_disconnect_callout(ch, xpcDisconnected); spin_lock_irqsave(&ch->lock, *irq_flags); @@ -1300,7 +1302,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) "delivered=%d, partid=%d, channel=%d\n", nmsgs_sent, ch->partid, ch->number); - if (ch->flags & XPC_C_CONNECTCALLOUT) { + if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) { xpc_activate_kthreads(ch, nmsgs_sent); } } diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index 9cd460dfe27e..8cbf16432570 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -750,12 +750,16 @@ xpc_daemonize_kthread(void *args) /* let registerer know that connection has been established */ spin_lock_irqsave(&ch->lock, irq_flags); - if (!(ch->flags & XPC_C_CONNECTCALLOUT)) { - ch->flags |= XPC_C_CONNECTCALLOUT; + if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) { + ch->flags |= XPC_C_CONNECTEDCALLOUT; spin_unlock_irqrestore(&ch->lock, irq_flags); xpc_connected_callout(ch); + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE; + spin_unlock_irqrestore(&ch->lock, irq_flags); + /* * It is possible that while the callout was being * made that the remote partition sent some messages. @@ -777,15 +781,17 @@ xpc_daemonize_kthread(void *args) if (atomic_dec_return(&ch->kthreads_assigned) == 0) { spin_lock_irqsave(&ch->lock, irq_flags); - if ((ch->flags & XPC_C_CONNECTCALLOUT) && - !(ch->flags & XPC_C_DISCONNECTCALLOUT)) { - ch->flags |= XPC_C_DISCONNECTCALLOUT; + if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { + ch->flags |= XPC_C_DISCONNECTINGCALLOUT; spin_unlock_irqrestore(&ch->lock, irq_flags); xpc_disconnect_callout(ch, xpcDisconnecting); - } else { - spin_unlock_irqrestore(&ch->lock, irq_flags); + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; } + spin_unlock_irqrestore(&ch->lock, irq_flags); if (atomic_dec_return(&part->nchannels_engaged) == 0) { xpc_mark_partition_disengaged(part); xpc_IPI_send_disengage(part); diff --git a/include/asm-ia64/sn/xpc.h b/include/asm-ia64/sn/xpc.h index 0c36928ffd8b..df7f5f4f3cde 100644 --- a/include/asm-ia64/sn/xpc.h +++ b/include/asm-ia64/sn/xpc.h @@ -508,19 +508,24 @@ struct xpc_channel { #define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */ #define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */ -#define XPC_C_CONNECTCALLOUT 0x00000040 /* channel connected callout made */ -#define XPC_C_CONNECTED 0x00000080 /* local channel is connected */ -#define XPC_C_CONNECTING 0x00000100 /* channel is being connected */ - -#define XPC_C_RCLOSEREPLY 0x00000200 /* remote close channel reply */ -#define XPC_C_CLOSEREPLY 0x00000400 /* local close channel reply */ -#define XPC_C_RCLOSEREQUEST 0x00000800 /* remote close channel request */ -#define XPC_C_CLOSEREQUEST 0x00001000 /* local close channel request */ - -#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */ -#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */ -#define XPC_C_DISCONNECTCALLOUT 0x00008000 /* chan disconnected callout made */ -#define XPC_C_WDISCONNECT 0x00010000 /* waiting for channel disconnect */ +#define XPC_C_CONNECTEDCALLOUT 0x00000040 /* connected callout initiated */ +#define XPC_C_CONNECTEDCALLOUT_MADE \ + 0x00000080 /* connected callout completed */ +#define XPC_C_CONNECTED 0x00000100 /* local channel is connected */ +#define XPC_C_CONNECTING 0x00000200 /* channel is being connected */ + +#define XPC_C_RCLOSEREPLY 0x00000400 /* remote close channel reply */ +#define XPC_C_CLOSEREPLY 0x00000800 /* local close channel reply */ +#define XPC_C_RCLOSEREQUEST 0x00001000 /* remote close channel request */ +#define XPC_C_CLOSEREQUEST 0x00002000 /* local close channel request */ + +#define XPC_C_DISCONNECTED 0x00004000 /* channel is disconnected */ +#define XPC_C_DISCONNECTING 0x00008000 /* channel is being disconnected */ +#define XPC_C_DISCONNECTINGCALLOUT \ + 0x00010000 /* disconnecting callout initiated */ +#define XPC_C_DISCONNECTINGCALLOUT_MADE \ + 0x00020000 /* disconnecting callout completed */ +#define XPC_C_WDISCONNECT 0x00040000 /* waiting for channel disconnect */ -- cgit v1.2.3 From defbb2c929cbe89dc92239b303cd33d3c85e9a83 Mon Sep 17 00:00:00 2001 From: "hawkes@sgi.com" Date: Tue, 14 Feb 2006 10:40:17 -0800 Subject: [IA64] ia64: simplify and fix udelay() The original ia64 udelay() was simple, but flawed for platforms without synchronized ITCs: a preemption and migration to another CPU during the while-loop likely resulted in too-early termination or very, very lengthy looping. The first fix (now in 2.6.15) broke the delay loop into smaller, non-preemptible chunks, reenabling preemption between the chunks. This fix is flawed in that the total udelay is computed to be the sum of just the non-premptible while-loop pieces, i.e., not counting the time spent in the interim preemptible periods. If an interrupt or a migration occurs during one of these interim periods, then that time is invisible and only serves to lengthen the effective udelay(). This new fix backs out the current flawed fix and returns to a simple udelay(), fully preemptible and interruptible. It implements two simple alternative udelay() routines: one a default generic version that uses ia64_get_itc(), and the other an sn-specific version that uses that platform's RTC. Signed-off-by: John Hawkes Signed-off-by: Tony Luck --- arch/ia64/kernel/time.c | 39 +++++++++++++++++---------------------- arch/ia64/sn/kernel/sn2/timer.c | 19 +++++++++++++++++++ include/asm-ia64/timex.h | 2 ++ 3 files changed, 38 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index a094ec49ccfa..307d01e15b2e 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -250,32 +250,27 @@ time_init (void) set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); } -#define SMALLUSECS 100 - -void -udelay (unsigned long usecs) +/* + * Generic udelay assumes that if preemption is allowed and the thread + * migrates to another CPU, that the ITC values are synchronized across + * all CPUs. + */ +static void +ia64_itc_udelay (unsigned long usecs) { - unsigned long start; - unsigned long cycles; - unsigned long smallusecs; + unsigned long start = ia64_get_itc(); + unsigned long end = start + usecs*local_cpu_data->cyc_per_usec; - /* - * Execute the non-preemptible delay loop (because the ITC might - * not be synchronized between CPUS) in relatively short time - * chunks, allowing preemption between the chunks. - */ - while (usecs > 0) { - smallusecs = (usecs > SMALLUSECS) ? SMALLUSECS : usecs; - preempt_disable(); - cycles = smallusecs*local_cpu_data->cyc_per_usec; - start = ia64_get_itc(); + while (time_before(ia64_get_itc(), end)) + cpu_relax(); +} - while (ia64_get_itc() - start < cycles) - cpu_relax(); +void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay; - preempt_enable(); - usecs -= smallusecs; - } +void +udelay (unsigned long usecs) +{ + (*ia64_udelay)(usecs); } EXPORT_SYMBOL(udelay); diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c index deb9baf4d473..56a88b6df4b4 100644 --- a/arch/ia64/sn/kernel/sn2/timer.c +++ b/arch/ia64/sn/kernel/sn2/timer.c @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -28,9 +29,27 @@ static struct time_interpolator sn2_interpolator = { .source = TIME_SOURCE_MMIO64 }; +/* + * sn udelay uses the RTC instead of the ITC because the ITC is not + * synchronized across all CPUs, and the thread may migrate to another CPU + * if preemption is enabled. + */ +static void +ia64_sn_udelay (unsigned long usecs) +{ + unsigned long start = rtc_time(); + unsigned long end = start + + usecs * sn_rtc_cycles_per_second / 1000000; + + while (time_before((unsigned long)rtc_time(), end)) + cpu_relax(); +} + void __init sn_timer_init(void) { sn2_interpolator.frequency = sn_rtc_cycles_per_second; sn2_interpolator.addr = RTC_COUNTER_ADDR; register_time_interpolator(&sn2_interpolator); + + ia64_udelay = &ia64_sn_udelay; } diff --git a/include/asm-ia64/timex.h b/include/asm-ia64/timex.h index 414aae060440..05a6baf8a472 100644 --- a/include/asm-ia64/timex.h +++ b/include/asm-ia64/timex.h @@ -15,6 +15,8 @@ typedef unsigned long cycles_t; +extern void (*ia64_udelay)(unsigned long usecs); + /* * For performance reasons, we don't want to define CLOCK_TICK_TRATE as * local_cpu_data->itc_rate. Fortunately, we don't have to, either: according to George -- cgit v1.2.3 From 48d5cad87c3a4998d0bda16ccfb5c60dfe4de5fb Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 15:10:22 -0800 Subject: [XFRM]: Fix SNAT-related crash in xfrm4_output_finish When a packet matching an IPsec policy is SNATed so it doesn't match any policy anymore it looses its xfrm bundle, which makes xfrm4_output_finish crash because of a NULL pointer dereference. This patch directs these packets to the original output path instead. Since the packets have already passed the POST_ROUTING hook, but need to start at the beginning of the original output path which includes another POST_ROUTING invocation, a flag is added to the IPCB to indicate that the packet was rerouted and doesn't need to pass the POST_ROUTING hook again. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 19 +++++++++++++++---- include/net/ip.h | 1 + include/net/xfrm.h | 1 - net/ipv4/ip_gre.c | 3 ++- net/ipv4/ip_output.c | 16 ++++++++++------ net/ipv4/ipip.c | 3 ++- net/ipv4/xfrm4_output.c | 13 ++++++++++--- 7 files changed, 40 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 4cf6088625c1..3ca3d9ee78a9 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -184,8 +184,11 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sk_buff *), int thresh, + int cond) { + if (!cond) + return 1; #ifndef CONFIG_NETFILTER_DEBUG if (list_empty(&nf_hooks[pf][hook])) return 1; @@ -197,7 +200,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { - return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN); + return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN, 1); } /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -224,7 +227,13 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ ({int __ret; \ -if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)\ +if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\ + __ret = (okfn)(skb); \ +__ret;}) + +#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) \ +({int __ret; \ +if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\ __ret = (okfn)(skb); \ __ret;}) @@ -295,11 +304,13 @@ extern struct proc_dir_entry *proc_net_netfilter; #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) +#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) static inline int nf_hook_thresh(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sk_buff *), int thresh, + int cond) { return okfn(*pskb); } diff --git a/include/net/ip.h b/include/net/ip.h index 8de0697b364c..fab3d5b3ab1c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -41,6 +41,7 @@ struct inet_skb_parm #define IPSKB_XFRM_TUNNEL_SIZE 2 #define IPSKB_XFRM_TRANSFORMED 4 #define IPSKB_FRAG_COMPLETE 8 +#define IPSKB_REROUTED 16 }; struct ipcm_cookie diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d09ca0e7d139..d6111a2f0a23 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -866,7 +866,6 @@ extern int xfrm_state_mtu(struct xfrm_state *x, int mtu); extern int xfrm_init_state(struct xfrm_state *x); extern int xfrm4_rcv(struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); -extern int xfrm4_output_finish(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); extern int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index abe23923e4e7..9981dcd68f11 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -830,7 +830,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, gre_hlen); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3324fbfe528a..57d290d89ec2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -207,8 +207,10 @@ static inline int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ - if (skb->dst->xfrm != NULL) - return xfrm4_output_finish(skb); + if (skb->dst->xfrm != NULL) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(skb); + } #endif if (skb->len > dst_mtu(skb->dst) && !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) @@ -271,8 +273,9 @@ int ip_mc_output(struct sk_buff *skb) newskb->dev, ip_dev_loopback_xmit); } - return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev, - ip_finish_output); + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev, + ip_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); } int ip_output(struct sk_buff *skb) @@ -284,8 +287,9 @@ int ip_output(struct sk_buff *skb) skb->dev = dev; skb->protocol = htons(ETH_P_IP); - return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, - ip_finish_output); + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, + ip_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); } int ip_queue_xmit(struct sk_buff *skb, int ipfragok) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index e5cbe72c6b80..03d13742a4b8 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -622,7 +622,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d4df0ddd424b..32ad229b4fed 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -152,10 +152,16 @@ error_nolock: goto out_exit; } -int xfrm4_output_finish(struct sk_buff *skb) +static int xfrm4_output_finish(struct sk_buff *skb) { int err; +#ifdef CONFIG_NETFILTER + if (!skb->dst->xfrm) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(skb); + } +#endif while (likely((err = xfrm4_output_one(skb)) == 0)) { nf_reset(skb); @@ -178,6 +184,7 @@ int xfrm4_output_finish(struct sk_buff *skb) int xfrm4_output(struct sk_buff *skb) { - return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, - xfrm4_output_finish); + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, + xfrm4_output_finish, + !(IPCB(skb)->flags & IPSKB_REROUTED)); } -- cgit v1.2.3 From 9c92d3486434e7310cb288587953e2dae4a79701 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 15:18:19 -0800 Subject: [NETFILTER]: Don't invoke okfn in CONFIG_NETFILTER=n variant of nf_hook() nf_hook() is supposed to call the netfilter hook and return control of the packet back to the caller in case it may pass, the okfn is only used for queueing. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 3ca3d9ee78a9..468896939843 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -318,7 +318,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { - return okfn(*pskb); + return 1; } static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} struct flowi; -- cgit v1.2.3 From 9f672004ab1a8094bec1785b39ac683ab9eebebc Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Wed, 15 Feb 2006 15:17:34 -0800 Subject: [PATCH] neofb: avoid resetting display config on unblank (v2) There were two mistakes in the register-read-on-(un)blank approach. - First, without proper register (un)locking the value read back will always be zero, and this is what I missed entirely until just now. Due to this, the logic could not be verified at all and I tried some bogus checks which are completely stupid. - Second, the LCD status bit will always be set to zero when the backlight has been turned off. Reading the value back during unblank will disable the LCD unconditionally, regardless of the state it is supposed to be in, since we set it to zero beforehand. So this is what we do now: - create a new variable in struct neofb_par, and use that to determine whether to read back registers (initialized to true) - before actually blanking the screen, read back the register to sense any possible change made through Fn key combo - use proper neoUnlock() / neoLock() to actually read something - every call to neofb_blank() determines if we read back next time: blanking disables readback, unblanking (FB_BLANK_UNBLANK) enables it This should give us a nice and clean state machine. Has been thoroughly tested on a Dell Latitude CPiA / NM220 Chip docked to a C/Dock2 with attached CRT in all possible combinations of LCD/CRT on/off. I changed the config via Fn key, let the console blank, unblanked by keypress - works flawlessly. Signed-off-by: Christian Trefzer Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/neofb.c | 15 ++++++++++++--- include/video/neomagic.h | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c index b85e2b180a44..a2e201dc40f7 100644 --- a/drivers/video/neofb.c +++ b/drivers/video/neofb.c @@ -843,6 +843,9 @@ static int neofb_set_par(struct fb_info *info) par->SysIfaceCntl2 = 0xc0; /* VESA Bios sets this to 0x80! */ + /* Initialize: by default, we want display config register to be read */ + par->PanelDispCntlRegRead = 1; + /* Enable any user specified display devices. */ par->PanelDispCntlReg1 = 0x00; if (par->internal_display) @@ -1334,11 +1337,17 @@ static int neofb_blank(int blank_mode, struct fb_info *info) struct neofb_par *par = info->par; int seqflags, lcdflags, dpmsflags, reg; + /* - * Reload the value stored in the register, might have been changed via - * FN keystroke + * Reload the value stored in the register, if sensible. It might have + * been changed via FN keystroke. */ - par->PanelDispCntlReg1 = vga_rgfx(NULL, 0x20) & 0x03; + if (par->PanelDispCntlRegRead) { + neoUnlock(); + par->PanelDispCntlReg1 = vga_rgfx(NULL, 0x20) & 0x03; + neoLock(&par->state); + } + par->PanelDispCntlRegRead = !blank_mode; switch (blank_mode) { case FB_BLANK_POWERDOWN: /* powerdown - both sync lines down */ diff --git a/include/video/neomagic.h b/include/video/neomagic.h index 1d69049bd4c1..78b1f15a538f 100644 --- a/include/video/neomagic.h +++ b/include/video/neomagic.h @@ -159,6 +159,7 @@ struct neofb_par { unsigned char PanelDispCntlReg1; unsigned char PanelDispCntlReg2; unsigned char PanelDispCntlReg3; + unsigned char PanelDispCntlRegRead; unsigned char PanelVertCenterReg1; unsigned char PanelVertCenterReg2; unsigned char PanelVertCenterReg3; -- cgit v1.2.3 From 5f6164f3092832e0d9b12eed52e09a76bf39c64a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 15 Feb 2006 15:17:39 -0800 Subject: [PATCH] add asm-generic/mman.h Make new MADV_REMOVE, MADV_DONTFORK, MADV_DOFORK consistent across all arches. The idea is to make it possible to use them portably even before distros include them in libc headers. Move common flags to asm-generic/mman.h Signed-off-by: Michael S. Tsirkin Cc: Roland Dreier Cc: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/mman.h | 8 +++++--- include/asm-arm/mman.h | 31 +------------------------------ include/asm-arm26/mman.h | 31 +------------------------------ include/asm-cris/mman.h | 31 +------------------------------ include/asm-frv/mman.h | 31 +------------------------------ include/asm-generic/mman.h | 42 ++++++++++++++++++++++++++++++++++++++++++ include/asm-h8300/mman.h | 31 +------------------------------ include/asm-i386/mman.h | 31 +------------------------------ include/asm-ia64/mman.h | 31 +------------------------------ include/asm-m32r/mman.h | 33 ++------------------------------- include/asm-m68k/mman.h | 31 +------------------------------ include/asm-mips/mman.h | 22 ++++++++++++---------- include/asm-parisc/mman.h | 8 +++++--- include/asm-powerpc/mman.h | 32 ++------------------------------ include/asm-s390/mman.h | 31 +------------------------------ include/asm-sh/mman.h | 31 +------------------------------ include/asm-sparc/mman.h | 31 ++----------------------------- include/asm-sparc64/mman.h | 31 ++----------------------------- include/asm-v850/mman.h | 30 +----------------------------- include/asm-x86_64/mman.h | 30 +----------------------------- include/asm-xtensa/mman.h | 22 ++++++++++++---------- 21 files changed, 96 insertions(+), 503 deletions(-) create mode 100644 include/asm-generic/mman.h (limited to 'include') diff --git a/include/asm-alpha/mman.h b/include/asm-alpha/mman.h index a21515c16a43..5f24c755f577 100644 --- a/include/asm-alpha/mman.h +++ b/include/asm-alpha/mman.h @@ -42,9 +42,11 @@ #define MADV_WILLNEED 3 /* will need these pages */ #define MADV_SPACEAVAIL 5 /* ensure resources are available */ #define MADV_DONTNEED 6 /* don't need these pages */ -#define MADV_REMOVE 7 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ + +/* common/generic parameters */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-arm/mman.h b/include/asm-arm/mman.h index 693ed859e632..54570d2e95b7 100644 --- a/include/asm-arm/mman.h +++ b/include/asm-arm/mman.h @@ -1,19 +1,7 @@ #ifndef __ARM_MMAN_H__ #define __ARM_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __ARM_MMAN_H__ */ diff --git a/include/asm-arm26/mman.h b/include/asm-arm26/mman.h index 2096c50df888..4000a6c1b76b 100644 --- a/include/asm-arm26/mman.h +++ b/include/asm-arm26/mman.h @@ -1,19 +1,7 @@ #ifndef __ARM_MMAN_H__ #define __ARM_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __ARM_MMAN_H__ */ diff --git a/include/asm-cris/mman.h b/include/asm-cris/mman.h index deddfb239ff5..1c35e1b66b46 100644 --- a/include/asm-cris/mman.h +++ b/include/asm-cris/mman.h @@ -3,19 +3,7 @@ /* verbatim copy of asm-i386/ version */ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -25,24 +13,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __CRIS_MMAN_H__ */ diff --git a/include/asm-frv/mman.h b/include/asm-frv/mman.h index d3bca306da82..b4371e928683 100644 --- a/include/asm-frv/mman.h +++ b/include/asm-frv/mman.h @@ -1,19 +1,7 @@ #ifndef __ASM_MMAN_H__ #define __ASM_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,25 +11,8 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __ASM_MMAN_H__ */ diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h new file mode 100644 index 000000000000..3b41d2bb70da --- /dev/null +++ b/include/asm-generic/mman.h @@ -0,0 +1,42 @@ +#ifndef _ASM_GENERIC_MMAN_H +#define _ASM_GENERIC_MMAN_H + +/* + Author: Michael S. Tsirkin , Mellanox Technologies Ltd. + Based on: asm-xxx/mman.h +*/ + +#define PROT_READ 0x1 /* page can be read */ +#define PROT_WRITE 0x2 /* page can be written */ +#define PROT_EXEC 0x4 /* page can be executed */ +#define PROT_SEM 0x8 /* page may be used for atomic ops */ +#define PROT_NONE 0x0 /* page can not be accessed */ +#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ +#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ + +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_TYPE 0x0f /* Mask for type of mapping */ +#define MAP_FIXED 0x10 /* Interpret addr exactly */ +#define MAP_ANONYMOUS 0x20 /* don't use a file */ + +#define MS_ASYNC 1 /* sync memory asynchronously */ +#define MS_INVALIDATE 2 /* invalidate the caches */ +#define MS_SYNC 4 /* synchronous memory sync */ + +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ + +/* compatibility flags */ +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FILE 0 + +#endif diff --git a/include/asm-h8300/mman.h b/include/asm-h8300/mman.h index ac0346f7d11d..b9f104f22a36 100644 --- a/include/asm-h8300/mman.h +++ b/include/asm-h8300/mman.h @@ -1,19 +1,7 @@ #ifndef __H8300_MMAN_H__ #define __H8300_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __H8300_MMAN_H__ */ diff --git a/include/asm-i386/mman.h b/include/asm-i386/mman.h index ab2339a1d807..8fd9d7ab7faf 100644 --- a/include/asm-i386/mman.h +++ b/include/asm-i386/mman.h @@ -1,19 +1,7 @@ #ifndef __I386_MMAN_H__ #define __I386_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __I386_MMAN_H__ */ diff --git a/include/asm-ia64/mman.h b/include/asm-ia64/mman.h index 357ebb780cc0..6ba179f12718 100644 --- a/include/asm-ia64/mman.h +++ b/include/asm-ia64/mman.h @@ -8,19 +8,7 @@ * David Mosberger-Tang , Hewlett-Packard Co */ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x00100 /* stack-like segment */ #define MAP_GROWSUP 0x00200 /* register stack-like segment */ @@ -31,24 +19,7 @@ #define MAP_POPULATE 0x08000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* _ASM_IA64_MMAN_H */ diff --git a/include/asm-m32r/mman.h b/include/asm-m32r/mman.h index 6b02fe3fcff2..695a860c024f 100644 --- a/include/asm-m32r/mman.h +++ b/include/asm-m32r/mman.h @@ -1,21 +1,9 @@ #ifndef __M32R_MMAN_H__ #define __M32R_MMAN_H__ -/* orig : i386 2.6.0-test6 */ - -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ +#include -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +/* orig : i386 2.6.0-test6 */ #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -25,24 +13,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __M32R_MMAN_H__ */ diff --git a/include/asm-m68k/mman.h b/include/asm-m68k/mman.h index efd12bc4ccb7..1626d37f4898 100644 --- a/include/asm-m68k/mman.h +++ b/include/asm-m68k/mman.h @@ -1,19 +1,7 @@ #ifndef __M68K_MMAN_H__ #define __M68K_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __M68K_MMAN_H__ */ diff --git a/include/asm-mips/mman.h b/include/asm-mips/mman.h index 6d01e26830fa..046cf686bee7 100644 --- a/include/asm-mips/mman.h +++ b/include/asm-mips/mman.h @@ -60,17 +60,19 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ /* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FILE 0 #endif /* _ASM_MMAN_H */ diff --git a/include/asm-parisc/mman.h b/include/asm-parisc/mman.h index a381cf5c8f55..0ef15ee0f17e 100644 --- a/include/asm-parisc/mman.h +++ b/include/asm-parisc/mman.h @@ -38,7 +38,11 @@ #define MADV_SPACEAVAIL 5 /* insure that resources are reserved */ #define MADV_VPS_PURGE 6 /* Purge pages from VM page cache */ #define MADV_VPS_INHERIT 7 /* Inherit parents page size */ -#define MADV_REMOVE 8 /* remove these pages & resources */ + +/* common/generic parameters */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ /* The range 12-64 is reserved for page size specification. */ #define MADV_4K_PAGES 12 /* Use 4K pages */ @@ -49,8 +53,6 @@ #define MADV_4M_PAGES 22 /* Use 4 Megabyte pages */ #define MADV_16M_PAGES 24 /* Use 16 Megabyte pages */ #define MADV_64M_PAGES 26 /* Use 64 Megabyte pages */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-powerpc/mman.h b/include/asm-powerpc/mman.h index fcff25d13f13..24cf664a8295 100644 --- a/include/asm-powerpc/mman.h +++ b/include/asm-powerpc/mman.h @@ -1,6 +1,8 @@ #ifndef _ASM_POWERPC_MMAN_H #define _ASM_POWERPC_MMAN_H +#include + /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -8,19 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ #define MAP_LOCKED 0x80 @@ -29,27 +18,10 @@ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* _ASM_POWERPC_MMAN_H */ diff --git a/include/asm-s390/mman.h b/include/asm-s390/mman.h index d41ca1477010..7839767d837e 100644 --- a/include/asm-s390/mman.h +++ b/include/asm-s390/mman.h @@ -9,19 +9,7 @@ #ifndef __S390_MMAN_H__ #define __S390_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -31,24 +19,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __S390_MMAN_H__ */ diff --git a/include/asm-sh/mman.h b/include/asm-sh/mman.h index 0e08d0573abc..156eb0225cf6 100644 --- a/include/asm-sh/mman.h +++ b/include/asm-sh/mman.h @@ -1,19 +1,7 @@ #ifndef __ASM_SH_MMAN_H #define __ASM_SH_MMAN_H -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __ASM_SH_MMAN_H */ diff --git a/include/asm-sparc/mman.h b/include/asm-sparc/mman.h index 4a298b2be859..88d1886abf3b 100644 --- a/include/asm-sparc/mman.h +++ b/include/asm-sparc/mman.h @@ -2,21 +2,10 @@ #ifndef __SPARC_MMAN_H__ #define __SPARC_MMAN_H__ -/* SunOS'ified... */ +#include -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ +/* SunOS'ified... */ -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ #define MAP_INHERIT 0x80 /* SunOS doesn't do this, but... */ @@ -27,10 +16,6 @@ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ @@ -48,18 +33,6 @@ #define MC_LOCKAS 5 /* Lock an entire address space of the calling process */ #define MC_UNLOCKAS 6 /* Unlock entire address space of calling process */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_FREE 0x5 /* (Solaris) contents can be freed */ -#define MADV_REMOVE 0x6 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 #endif /* __SPARC_MMAN_H__ */ diff --git a/include/asm-sparc64/mman.h b/include/asm-sparc64/mman.h index d705ec92da8b..6fd878e61435 100644 --- a/include/asm-sparc64/mman.h +++ b/include/asm-sparc64/mman.h @@ -2,21 +2,10 @@ #ifndef __SPARC64_MMAN_H__ #define __SPARC64_MMAN_H__ -/* SunOS'ified... */ +#include -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ +/* SunOS'ified... */ -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ #define MAP_INHERIT 0x80 /* SunOS doesn't do this, but... */ @@ -27,10 +16,6 @@ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ @@ -48,18 +33,6 @@ #define MC_LOCKAS 5 /* Lock an entire address space of the calling process */ #define MC_UNLOCKAS 6 /* Unlock entire address space of calling process */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_FREE 0x5 /* (Solaris) contents can be freed */ -#define MADV_REMOVE 0x6 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 #endif /* __SPARC64_MMAN_H__ */ diff --git a/include/asm-v850/mman.h b/include/asm-v850/mman.h index 7b851c310e41..edbf6edbfb37 100644 --- a/include/asm-v850/mman.h +++ b/include/asm-v850/mman.h @@ -1,18 +1,7 @@ #ifndef __V850_MMAN_H__ #define __V850_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -20,24 +9,7 @@ #define MAP_LOCKED 0x2000 /* pages are locked */ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __V850_MMAN_H__ */ diff --git a/include/asm-x86_64/mman.h b/include/asm-x86_64/mman.h index b699a38c1c3c..dd5cb0534d37 100644 --- a/include/asm-x86_64/mman.h +++ b/include/asm-x86_64/mman.h @@ -1,19 +1,8 @@ #ifndef __X8664_MMAN_H__ #define __X8664_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_SEM 0x8 -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ +#include -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_32BIT 0x40 /* only give out 32bit addresses */ #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ @@ -24,24 +13,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif diff --git a/include/asm-xtensa/mman.h b/include/asm-xtensa/mman.h index e2d7afb679c8..ba394cbb4807 100644 --- a/include/asm-xtensa/mman.h +++ b/include/asm-xtensa/mman.h @@ -67,17 +67,19 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ /* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FILE 0 #endif /* _XTENSA_MMAN_H */ -- cgit v1.2.3 From b2ee9dbfad14ba8e34a589d552ddc67300a26bec Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Wed, 15 Feb 2006 15:17:40 -0800 Subject: [PATCH] hrtimer: fix multiple macro argument expansion For two macros the arguments were expanded twice, change them to inline functions to avoid it. Signed-off-by: Roman Zippel Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ktime.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 6aca67a569a2..f3dec45ef874 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -96,10 +96,16 @@ static inline ktime_t ktime_set(const long secs, const unsigned long nsecs) ({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; }) /* convert a timespec to ktime_t format: */ -#define timespec_to_ktime(ts) ktime_set((ts).tv_sec, (ts).tv_nsec) +static inline ktime_t timespec_to_ktime(struct timespec ts) +{ + return ktime_set(ts.tv_sec, ts.tv_nsec); +} /* convert a timeval to ktime_t format: */ -#define timeval_to_ktime(tv) ktime_set((tv).tv_sec, (tv).tv_usec * 1000) +static inline ktime_t timeval_to_ktime(struct timeval tv) +{ + return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); +} /* Map the ktime_t to timespec conversion to ns_to_timespec function */ #define ktime_to_timespec(kt) ns_to_timespec((kt).tv64) -- cgit v1.2.3 From 7bbb79403163e047c6e333ff169db34e3c969e65 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 16 Feb 2006 11:08:09 +0000 Subject: [ARM] Fix SMP initialisation oops A change to the SMP initialisation caused the following oops: CPU1: Booted secondary processor CPU1: D VIPT write-back cache CPU1: I cache: 32768 bytes, associativity 4, 32 byte lines, 256 sets CPU1: D cache: 32768 bytes, associativity 4, 32 byte lines, 256 sets <7>Calibrating delay loop... 83.14 BogoMIPS (lpj=415744) <1>Unable to handle kernel NULL pointer dereference at virtual address 0000001c ... PC is at enqueue_task+0x1c/0x64 LR is at activate_task+0xcc/0xe4 SMP initialisation now requires cpu_possible_map to be initialised in setup_arch(). Move this from smp_prepare_cpus() to smp_init_cpus() and call it from our setup_arch() if CONFIG_SMP is enabled. Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 5 +++++ arch/arm/kernel/smp.c | 1 - arch/arm/mach-integrator/platsmp.c | 21 +++++++++++++++------ arch/arm/mach-realview/platsmp.c | 21 +++++++++++++++------ include/asm-arm/smp.h | 5 +++++ 5 files changed, 40 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index c45d10d07bde..68273b4dc882 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -771,6 +772,10 @@ void __init setup_arch(char **cmdline_p) paging_init(&meminfo, mdesc); request_standard_resources(&meminfo, mdesc); +#ifdef CONFIG_SMP + smp_init_cpus(); +#endif + cpu_init(); /* diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 7338948bd7d3..02aa300c4633 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -338,7 +338,6 @@ void __init smp_prepare_boot_cpu(void) per_cpu(cpu_data, cpu).idle = current; - cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); cpu_set(cpu, cpu_online_map); } diff --git a/arch/arm/mach-integrator/platsmp.c b/arch/arm/mach-integrator/platsmp.c index ea10bd8c972c..1bc8534ef0c6 100644 --- a/arch/arm/mach-integrator/platsmp.c +++ b/arch/arm/mach-integrator/platsmp.c @@ -140,6 +140,18 @@ static void __init poke_milo(void) mb(); } +/* + * Initialise the CPU possible map early - this describes the CPUs + * which may be present or become present in the system. + */ +void __init smp_init_cpus(void) +{ + unsigned int i, ncores = get_core_count(); + + for (i = 0; i < ncores; i++) + cpu_set(i, cpu_possible_map); +} + void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned int ncores = get_core_count(); @@ -176,14 +188,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) max_cpus = ncores; /* - * Initialise the possible/present maps. - * cpu_possible_map describes the set of CPUs which may be present - * cpu_present_map describes the set of CPUs populated + * Initialise the present map, which describes the set of CPUs + * actually populated at the present time. */ - for (i = 0; i < max_cpus; i++) { - cpu_set(i, cpu_possible_map); + for (i = 0; i < max_cpus; i++) cpu_set(i, cpu_present_map); - } /* * Do we need any more CPUs? If so, then let them know where diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c index a8fbd76d8be5..b8484e15dacb 100644 --- a/arch/arm/mach-realview/platsmp.c +++ b/arch/arm/mach-realview/platsmp.c @@ -143,6 +143,18 @@ static void __init poke_milo(void) mb(); } +/* + * Initialise the CPU possible map early - this describes the CPUs + * which may be present or become present in the system. + */ +void __init smp_init_cpus(void) +{ + unsigned int i, ncores = get_core_count(); + + for (i = 0; i < ncores; i++) + cpu_set(i, cpu_possible_map); +} + void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned int ncores = get_core_count(); @@ -179,14 +191,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) local_timer_setup(cpu); /* - * Initialise the possible/present maps. - * cpu_possible_map describes the set of CPUs which may be present - * cpu_present_map describes the set of CPUs populated + * Initialise the present map, which describes the set of CPUs + * actually populated at the present time. */ - for (i = 0; i < max_cpus; i++) { - cpu_set(i, cpu_possible_map); + for (i = 0; i < max_cpus; i++) cpu_set(i, cpu_present_map); - } /* * Do we need any more CPUs? If so, then let them know where diff --git a/include/asm-arm/smp.h b/include/asm-arm/smp.h index 5a72e50ca9fc..fe45f7f61223 100644 --- a/include/asm-arm/smp.h +++ b/include/asm-arm/smp.h @@ -41,6 +41,11 @@ extern void show_ipi_list(struct seq_file *p); */ asmlinkage void do_IPI(struct pt_regs *regs); +/* + * Setup the SMP cpu_possible_map + */ +extern void smp_init_cpus(void); + /* * Move global data into per-processor storage. */ -- cgit v1.2.3 From d9db950cfa3d674ee834d980c329efdf8e4a0568 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 16 Feb 2006 22:36:15 +0000 Subject: [ARM] 3339/1: ARM EABI: make unmuxed syscalls visible Patch from Nicolas Pitre With EABI the multiplex sys_ipc and sys_socketcall syscalls are unavailable and their support code even removed from the compiled kernel, and the new unmuxed syscalls must be used instead. Make those syscall numbers visible. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- include/asm-arm/unistd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h index 77430d6178ae..8f331bbd39a8 100644 --- a/include/asm-arm/unistd.h +++ b/include/asm-arm/unistd.h @@ -309,7 +309,7 @@ #define __NR_mq_getsetattr (__NR_SYSCALL_BASE+279) #define __NR_waitid (__NR_SYSCALL_BASE+280) -#if 0 /* reserve these for un-muxing socketcall */ +#if defined(__ARM_EABI__) /* reserve these for un-muxing socketcall */ #define __NR_socket (__NR_SYSCALL_BASE+281) #define __NR_bind (__NR_SYSCALL_BASE+282) #define __NR_connect (__NR_SYSCALL_BASE+283) @@ -329,7 +329,7 @@ #define __NR_recvmsg (__NR_SYSCALL_BASE+297) #endif -#if 0 /* reserve these for un-muxing ipc */ +#if defined(__ARM_EABI__) /* reserve these for un-muxing ipc */ #define __NR_semop (__NR_SYSCALL_BASE+298) #define __NR_semget (__NR_SYSCALL_BASE+299) #define __NR_semctl (__NR_SYSCALL_BASE+300) @@ -347,7 +347,7 @@ #define __NR_request_key (__NR_SYSCALL_BASE+310) #define __NR_keyctl (__NR_SYSCALL_BASE+311) -#if 0 /* reserved for un-muxing ipc */ +#if defined(__ARM_EABI__) /* reserved for un-muxing ipc */ #define __NR_semtimedop (__NR_SYSCALL_BASE+312) #endif -- cgit v1.2.3 From a62eaf151d9cb478d127cfbc2e93c498869785b0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:41:58 +0100 Subject: [PATCH] x86_64: Add boot option to disable randomized mappings and cleanup AMD SimNow!'s JIT doesn't like them at all in the guest. For distribution installation it's easiest if it's a boot time option. Also I moved the variable to a more appropiate place and make it independent from sysctl And marked __read_mostly which it is. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 3 +++ arch/i386/kernel/cpu/transmeta.c | 1 + include/linux/kernel.h | 6 ------ include/linux/mm.h | 2 ++ kernel/sysctl.c | 2 -- mm/memory.c | 10 ++++++++++ 6 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ac75b57edf2e..b874771385cd 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1638,6 +1638,9 @@ running once the system is up. Format: ,,,,,[,[,[,]]] + norandmaps Don't use address space randomization + Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space + ______________________________________________________________________ Changelog: diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index bdbeb77f4e22..7214c9b577ab 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/include/linux/kernel.h b/include/linux/kernel.h index b49affa0ac5a..3b507bf05d09 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -326,12 +326,6 @@ struct sysinfo { /* Force a compilation error if condition is true */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) -#ifdef CONFIG_SYSCTL -extern int randomize_va_space; -#else -#define randomize_va_space 1 -#endif - /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) diff --git a/include/linux/mm.h b/include/linux/mm.h index 75e9f0724997..26e1663a5cbe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1051,5 +1051,7 @@ int shrink_slab(unsigned long scanned, gfp_t gfp_mask, void drop_pagecache(void); void drop_slab(void); +extern int randomize_va_space; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 71dd6f62efec..7654d55c47f5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -126,8 +126,6 @@ extern int sysctl_hz_timer; extern int acct_parm[]; #endif -int randomize_va_space = 1; - static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, ctl_table *, void **); static int proc_doutsstring(ctl_table *table, int write, struct file *filp, diff --git a/mm/memory.c b/mm/memory.c index 2bee1f21aa8a..9abc6008544b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -82,6 +82,16 @@ EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(high_memory); EXPORT_SYMBOL(vmalloc_earlyreserve); +int randomize_va_space __read_mostly = 1; + +static int __init disable_randmaps(char *s) +{ + randomize_va_space = 0; + return 0; +} +__setup("norandmaps", disable_randmaps); + + /* * If a p?d_bad entry is found while walking page tables, report * the error, before resetting entry to p?d_none. Usually (but -- cgit v1.2.3 From 7fd67843b96f90f59c9a244a1bc25137978a3ff9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:42:07 +0100 Subject: [PATCH] x86_64: Disable tsc when apicpmtimer is active Otherwise it has no effect anyways. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/apic.c | 1 + arch/x86_64/kernel/time.c | 3 +-- include/asm-x86_64/proto.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 7a0a3e8d5d72..e5b14c57eaa0 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -1152,6 +1152,7 @@ __setup("noapicmaintimer", setup_noapicmaintimer); static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; + notsc_setup(NULL); return setup_apicmaintimer(NULL); } __setup("apicpmtimer", setup_apicpmtimer); diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 3c58c30506a1..67841d11ed1f 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -1327,8 +1327,7 @@ static int __init nohpet_setup(char *s) __setup("nohpet", nohpet_setup); - -static int __init notsc_setup(char *s) +int __init notsc_setup(char *s) { notsc = 1; return 0; diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index c99832e7bf3f..eca3f2d633db 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -133,6 +133,7 @@ extern int fix_aperture; extern int force_iommu; extern int reboot_force; +extern int notsc_setup(char *); extern void smp_local_timer_interrupt(struct pt_regs * regs); -- cgit v1.2.3 From 726c14bf499e91e7ede4f1728830aba05c675061 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 17 Feb 2006 10:30:23 +1100 Subject: [PATCH] Provide an interface for getting the current tick length This provides an interface for arch code to find out how many nanoseconds are going to be added on to xtime by the next call to do_timer. The value returned is a fixed-point number in 52.12 format in nanoseconds. The reason for this format is that it gives the full precision that the timekeeping code is using internally. The motivation for this is to fix a problem that has arisen on 32-bit powerpc in that the value returned by do_gettimeofday drifts apart from xtime if NTP is being used. PowerPC is now using a lockless do_gettimeofday based on reading the timebase register and performing some simple arithmetic. (This method of getting the time is also exported to userspace via the VDSO.) However, the factor and offset it uses were calculated based on the nominal tick length and weren't being adjusted when NTP varied the tick length. Note that 64-bit powerpc has had the lockless do_gettimeofday for a long time now. It also had an extremely hairy routine that got called from the 32-bit compat routine for adjtimex, which adjusted the factor and offset according to what it thought the timekeeping code was going to do. Not only was this only called if a 32-bit task did adjtimex (i.e. not if a 64-bit task did adjtimex), it was also duplicating computations from kernel/timer.c and it wasn't clear that it was (still) correct. The simple solution is to ask the timekeeping code how long the current jiffy will be on each timer interrupt, after calling do_timer. If this jiffy will be a different length from the last one, we then need to compute new values for the factor and offset used in the lockless do_gettimeofday. In this way we can keep xtime and do_gettimeofday in sync, even when NTP is varying the tick length. Note that when adjtimex varies the tick length, it almost always introduces the variation from the next tick on. The only case I could see where adjtimex would vary the length of the current tick is when an old-style adjtime adjustment is being cancelled. (It's not clear to me why the adjustment has to be cancelled immediately rather than from the next tick on.) Thus I don't see any real need for a hook in adjtimex; the rare case of an old-style adjustment being cancelled can be fixed up at the next tick. Signed-off-by: Paul Mackerras Acked-by: john stultz Signed-off-by: Linus Torvalds --- include/linux/timex.h | 3 +++ kernel/timer.c | 39 ++++++++++++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index 04a4a8cb4ed3..b7ca1204e42a 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -345,6 +345,9 @@ time_interpolator_reset(void) #endif /* !CONFIG_TIME_INTERPOLATION */ +/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ +extern u64 current_tick_length(void); + #endif /* KERNEL */ #endif /* LINUX_TIMEX_H */ diff --git a/kernel/timer.c b/kernel/timer.c index b9dad3994676..fe3a9a9f8328 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -717,12 +717,16 @@ static void second_overflow(void) #endif } -/* in the NTP reference this is called "hardclock()" */ -static void update_wall_time_one_tick(void) +/* + * Returns how many microseconds we need to add to xtime this tick + * in doing an adjustment requested with adjtime. + */ +static long adjtime_adjustment(void) { - long time_adjust_step, delta_nsec; + long time_adjust_step; - if ((time_adjust_step = time_adjust) != 0 ) { + time_adjust_step = time_adjust; + if (time_adjust_step) { /* * We are doing an adjtime thing. Prepare time_adjust_step to * be within bounds. Note that a positive time_adjust means we @@ -733,10 +737,19 @@ static void update_wall_time_one_tick(void) */ time_adjust_step = min(time_adjust_step, (long)tickadj); time_adjust_step = max(time_adjust_step, (long)-tickadj); + } + return time_adjust_step; +} +/* in the NTP reference this is called "hardclock()" */ +static void update_wall_time_one_tick(void) +{ + long time_adjust_step, delta_nsec; + + time_adjust_step = adjtime_adjustment(); + if (time_adjust_step) /* Reduce by this step the amount of time left */ time_adjust -= time_adjust_step; - } delta_nsec = tick_nsec + time_adjust_step * 1000; /* * Advance the phase, once it gets to one microsecond, then @@ -758,6 +771,22 @@ static void update_wall_time_one_tick(void) } } +/* + * Return how long ticks are at the moment, that is, how much time + * update_wall_time_one_tick will add to xtime next time we call it + * (assuming no calls to do_adjtimex in the meantime). + * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10 + * bits to the right of the binary point. + * This function has no side-effects. + */ +u64 current_tick_length(void) +{ + long delta_nsec; + + delta_nsec = tick_nsec + adjtime_adjustment() * 1000; + return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; +} + /* * Using a loop looks inefficient, but "ticks" is * usually just one (we shouldn't be losing ticks, -- cgit v1.2.3 From cfe91f9ce297e23e6fbdf61c02bdd8ab9af7c8a8 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Fri, 17 Feb 2006 03:16:55 -0500 Subject: [PATCH] i386: fix singlestepping though a syscall Do not mask TIF_SINGLESTEP bit in _TIF_WORK_MASK. Masking this stopped do_notify_resume() from being called when it should have been. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Linus Torvalds --- include/asm-i386/thread_info.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index e20e99551d71..1f7d48c9ba3f 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h @@ -158,8 +158,8 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__; /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ - (0x0000FFFF & ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|\ - _TIF_SECCOMP|_TIF_SYSCALL_EMU)) + (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + _TIF_SECCOMP | _TIF_SYSCALL_EMU)) /* work to do on any return to u-space */ #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) -- cgit v1.2.3 From 255acee706b333b79f593dd366f16e1f107cccc3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 17 Feb 2006 13:52:46 -0800 Subject: [PATCH] s390: additional_cpus parameter Introduce additional_cpus command line option. By default no additional cpu can be attached to the system anymore. Only the cpus present at IPL time can be switched on/off. If it is desired that additional cpus can be attached to the system the maximum number of additional cpus needs to be specified with this option. This change is necessary in order to limit the waste of per_cpu data structures. Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cpu-hotplug.txt | 10 ++++---- arch/s390/kernel/setup.c | 2 ++ arch/s390/kernel/smp.c | 58 +++++++++++++++++++++++++++++-------------- include/asm-s390/smp.h | 2 ++ 4 files changed, 49 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index e05278087ffa..4d3355da0e26 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -11,6 +11,8 @@ Joel Schopp ia64/x86_64: Ashok Raj + s390: + Heiko Carstens Authors: Ashok Raj Lots of feedback: Nathan Lynch , @@ -44,11 +46,9 @@ maxcpus=n Restrict boot time cpus to n. Say if you have 4 cpus, using maxcpus=2 will only boot 2. You can choose to bring the other cpus later online, read FAQ's for more info. -additional_cpus*=n Use this to limit hotpluggable cpus. This option sets - cpu_possible_map = cpu_present_map + additional_cpus - -(*) Option valid only for following architectures -- x86_64, ia64 +additional_cpus=n [x86_64, s390 only] use this to limit hotpluggable cpus. + This option sets + cpu_possible_map = cpu_present_map + additional_cpus ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT to determine the number of potentially hot-pluggable cpus. The implementation diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index de8784267473..24f62f16c0e5 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -600,6 +600,7 @@ setup_arch(char **cmdline_p) init_mm.brk = (unsigned long) &_end; parse_cmdline_early(cmdline_p); + parse_early_param(); setup_memory(); setup_resources(); @@ -607,6 +608,7 @@ setup_arch(char **cmdline_p) cpu_init(); __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr; + smp_setup_cpu_possible_map(); /* * Create kernel page tables and switch to virtual addressing. diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 0d1ad5dbe2b1..53291e94ac7b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1,8 +1,7 @@ /* * arch/s390/kernel/smp.c * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright (C) IBM Corp. 1999,2006 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) * Heiko Carstens (heiko.carstens@de.ibm.com) @@ -41,8 +40,6 @@ #include #include -/* prototypes */ - extern volatile int __cpu_logical_map[]; /* @@ -51,13 +48,11 @@ extern volatile int __cpu_logical_map[]; struct _lowcore *lowcore_ptr[NR_CPUS]; -cpumask_t cpu_online_map; -cpumask_t cpu_possible_map = CPU_MASK_ALL; +cpumask_t cpu_online_map = CPU_MASK_NONE; +cpumask_t cpu_possible_map = CPU_MASK_NONE; static struct task_struct *current_set[NR_CPUS]; -EXPORT_SYMBOL(cpu_online_map); - /* * Reboot, halt and power_off routines for SMP. */ @@ -490,10 +485,10 @@ void smp_ctl_clear_bit(int cr, int bit) { * Lets check how many CPUs we have. */ -void -__init smp_check_cpus(unsigned int max_cpus) +static unsigned int +__init smp_count_cpus(void) { - int cpu, num_cpus; + unsigned int cpu, num_cpus; __u16 boot_cpu_addr; /* @@ -503,19 +498,20 @@ __init smp_check_cpus(unsigned int max_cpus) boot_cpu_addr = S390_lowcore.cpu_data.cpu_addr; current_thread_info()->cpu = 0; num_cpus = 1; - for (cpu = 0; cpu <= 65535 && num_cpus < max_cpus; cpu++) { + for (cpu = 0; cpu <= 65535; cpu++) { if ((__u16) cpu == boot_cpu_addr) continue; - __cpu_logical_map[num_cpus] = (__u16) cpu; - if (signal_processor(num_cpus, sigp_sense) == + __cpu_logical_map[1] = (__u16) cpu; + if (signal_processor(1, sigp_sense) == sigp_not_operational) continue; - cpu_set(num_cpus, cpu_present_map); num_cpus++; } printk("Detected %d CPU's\n",(int) num_cpus); printk("Boot cpu address %2X\n", boot_cpu_addr); + + return num_cpus; } /* @@ -676,6 +672,32 @@ __cpu_up(unsigned int cpu) return 0; } +static unsigned int __initdata additional_cpus; + +void __init smp_setup_cpu_possible_map(void) +{ + unsigned int pcpus, cpu; + + pcpus = smp_count_cpus() + additional_cpus; + + if (pcpus > NR_CPUS) + pcpus = NR_CPUS; + + for (cpu = 0; cpu < pcpus; cpu++) + cpu_set(cpu, cpu_possible_map); + + cpu_present_map = cpu_possible_map; +} + +#ifdef CONFIG_HOTPLUG_CPU + +static int __init setup_additional_cpus(char *s) +{ + additional_cpus = simple_strtoul(s, NULL, 0); + return 0; +} +early_param("additional_cpus", setup_additional_cpus); + int __cpu_disable(void) { @@ -744,6 +766,8 @@ cpu_die(void) for(;;); } +#endif /* CONFIG_HOTPLUG_CPU */ + /* * Cycle through the processors and setup structures. */ @@ -757,7 +781,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* request the 0x1201 emergency signal external interrupt */ if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1201"); - smp_check_cpus(max_cpus); memset(lowcore_ptr,0,sizeof(lowcore_ptr)); /* * Initialize prefix pages and stacks for all possible cpus @@ -806,14 +829,12 @@ void __devinit smp_prepare_boot_cpu(void) BUG_ON(smp_processor_id() != 0); cpu_set(0, cpu_online_map); - cpu_set(0, cpu_present_map); S390_lowcore.percpu_offset = __per_cpu_offset[0]; current_set[0] = current; } void smp_cpus_done(unsigned int max_cpus) { - cpu_present_map = cpu_possible_map; } /* @@ -845,6 +866,7 @@ static int __init topology_init(void) subsys_initcall(topology_init); +EXPORT_SYMBOL(cpu_online_map); EXPORT_SYMBOL(cpu_possible_map); EXPORT_SYMBOL(lowcore_ptr); EXPORT_SYMBOL(smp_ctl_set_bit); diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h index 9c6e9c300eb9..444dae5912e6 100644 --- a/include/asm-s390/smp.h +++ b/include/asm-s390/smp.h @@ -31,6 +31,7 @@ typedef struct __u16 cpu; } sigp_info; +extern void smp_setup_cpu_possible_map(void); extern int smp_call_function_on(void (*func) (void *info), void *info, int nonatomic, int wait, int cpu); #define NO_PROC_ID 0xFF /* No processor magic marker */ @@ -104,6 +105,7 @@ smp_call_function_on(void (*func) (void *info), void *info, #define smp_cpu_not_running(cpu) 1 #define smp_get_cpu(cpu) ({ 0; }) #define smp_put_cpu(cpu) ({ 0; }) +#define smp_setup_cpu_possible_map() #endif #endif -- cgit v1.2.3 From 200a4552af34b9a32e1f68a881a9ed5c7ec699cc Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 17 Feb 2006 13:52:56 -0800 Subject: [PATCH] powerpc: Fix accidentally-working typo in __pud_free_tlb One of the parameters to the __pud_free_tlb() macro for powerpc is incorrect (see patch) . We get away with it by accident, because the one place the macro is called, the second parameter is a variable named "pud". Signed-off-by: David Gibson Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-powerpc/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-powerpc/pgalloc.h b/include/asm-powerpc/pgalloc.h index 9f5b052784a5..a00ee002cd11 100644 --- a/include/asm-powerpc/pgalloc.h +++ b/include/asm-powerpc/pgalloc.h @@ -146,7 +146,7 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) #ifndef CONFIG_PPC_64K_PAGES -#define __pud_free_tlb(tlb, pmd) \ +#define __pud_free_tlb(tlb, pud) \ pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) #endif /* CONFIG_PPC_64K_PAGES */ -- cgit v1.2.3