From fbb18a277a6f192404aa20ece49529acb1e1e76d Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 26 Mar 2006 23:13:39 +0100 Subject: [SERIAL] amba-pl010: allow platforms to specify modem control method The amba-pl010 hardware does not provide RTS and DTR control lines; it is expected that these will be implemented using GPIO. Allow platforms to supply a function to implement manipulation of modem control lines. Signed-off-by: Russell King --- include/linux/amba/serial.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index dc726ffccebd..48ee32a18ac5 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -158,4 +158,10 @@ #define UART01x_RSR_ANY (UART01x_RSR_OE|UART01x_RSR_BE|UART01x_RSR_PE|UART01x_RSR_FE) #define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS) +#ifndef __ASSEMBLY__ +struct amba_pl010_data { + void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl); +}; +#endif + #endif -- cgit v1.2.3 From 837c7878771c15ed8d85ecf814ece7fcb4551b46 Mon Sep 17 00:00:00 2001 From: Ben Woodard Date: Wed, 22 Mar 2006 08:09:31 +0100 Subject: [BLOCK] increase size of disk stat counters The kernel's representation of the disk statistics uses the type unsigned which is 32b on both 32b and 64b platforms. Unfortunately, most system tools that work with these numbers that are exported in /proc/diskstats including iostat read these numbers into unsigned longs. This works fine on 32b platforms and when the number of IO transactions are small on 64b platforms. However, when the numbers wrap on 64b platforms & you read the numbers into unsigned longs, and compare the numbers to previous readings, then you get an unsigned representation of a negative number. This looks like a very large 64b number & gives you bizarre readouts in iostat: ilc4: Device: rrqm/s wrqm/s r/s w/s rsec/s wsec/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util ilc4: sda 5.50 0.00 143.96 0.00 307496983987862656.00 0.00 153748491993931328.00 0.00 2136028725038430.00 7.94 55.12 5.59 80.42 Though fixing iostat in user space is possible, and a quick survey indicates that several other similar tools also use unsigned longs when processing /proc/diskstats. Therefore, it seems like a better approach would be to extend the length of the disk_stats structure on 64b architectures to 64b. The following patch does that. It should not affect the operation on 32b platforms. Signed-off-by: Ben Woodard Cc: Rick Lindsley Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/genhd.c | 6 +++--- include/linux/genhd.h | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 64510fd88621..db4c60c802d6 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -454,8 +454,8 @@ static ssize_t disk_stats_read(struct gendisk * disk, char *page) disk_round_stats(disk); preempt_enable(); return sprintf(page, - "%8u %8u %8llu %8u " - "%8u %8u %8llu %8u " + "%8lu %8lu %8llu %8u " + "%8lu %8lu %8llu %8u " "%8u %8u %8u" "\n", disk_stat_read(disk, ios[READ]), @@ -649,7 +649,7 @@ static int diskstats_show(struct seq_file *s, void *v) preempt_disable(); disk_round_stats(gp); preempt_enable(); - seq_printf(s, "%4d %4d %s %u %u %llu %u %u %u %llu %u %u %u %u\n", + seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", gp->major, n + gp->first_minor, disk_name(gp, n, buf), disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), (unsigned long long)disk_stat_read(gp, sectors[0]), diff --git a/include/linux/genhd.h b/include/linux/genhd.h index fd647fde5ec1..179fea53fc81 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -89,12 +89,12 @@ struct hd_struct { #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 struct disk_stats { - unsigned sectors[2]; /* READs and WRITEs */ - unsigned ios[2]; - unsigned merges[2]; - unsigned ticks[2]; - unsigned io_ticks; - unsigned time_in_queue; + unsigned long sectors[2]; /* READs and WRITEs */ + unsigned long ios[2]; + unsigned long merges[2]; + unsigned long ticks[2]; + unsigned long io_ticks; + unsigned long time_in_queue; }; struct gendisk { -- cgit v1.2.3 From f75ba3ade8a4599d67040a9493d75a864e7b329c Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:52 -0800 Subject: [PATCH] autofs4: increase module version Update autofs4 version. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_fs4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index 9343c89d843c..d998ddcf7288 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -23,7 +23,7 @@ #define AUTOFS_MIN_PROTO_VERSION 3 #define AUTOFS_MAX_PROTO_VERSION 4 -#define AUTOFS_PROTO_SUBVERSION 7 +#define AUTOFS_PROTO_SUBVERSION 10 /* Mask for expire behaviour */ #define AUTOFS_EXP_IMMEDIATE 1 -- cgit v1.2.3 From 5c0a32fc2cd0be912511199449a37a4a6f0f582d Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:55 -0800 Subject: [PATCH] autofs4: add new packet type for v5 communications This patch define a new autofs packet for autofs v5 and updates the waitq.c functions to handle the additional packet type. Signed-off-by: Ian Kent Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/autofs_i.h | 23 +++++++++---- fs/autofs4/waitq.c | 86 +++++++++++++++++++++++++++++++++++++++++------- include/linux/auto_fs4.h | 51 +++++++++++++++++++++++++--- 3 files changed, 136 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index ed388a1d8fc4..37c8d909d1e9 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -77,6 +77,12 @@ struct autofs_wait_queue { int hash; int len; char *name; + u32 dev; + u64 ino; + uid_t uid; + gid_t gid; + pid_t pid; + pid_t tgid; /* This is for status reporting upon return */ int status; atomic_t notified; @@ -180,13 +186,6 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info /* Queue management functions */ -enum autofs_notify -{ - NFY_NONE, - NFY_MOUNT, - NFY_EXPIRE -}; - int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); void autofs4_catatonic_mode(struct autofs_sb_info *); @@ -204,6 +203,16 @@ static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **de return res; } +static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi) +{ + return new_encode_dev(sbi->sb->s_dev); +} + +static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi) +{ + return sbi->sb->s_root->d_inode->i_ino; +} + static inline int simple_positive(struct dentry *dentry) { return dentry->d_inode && !d_unhashed(dentry); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index b0bb9d43bcd9..12da2c977b0a 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -3,7 +3,7 @@ * linux/fs/autofs/waitq.c * * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved - * Copyright 2001-2003 Ian Kent + * Copyright 2001-2006 Ian Kent * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your @@ -97,7 +97,10 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pkt.hdr.proto_version = sbi->version; pkt.hdr.type = type; - if (type == autofs_ptype_missing) { + switch (type) { + /* Kernel protocol v4 missing and expire packets */ + case autofs_ptype_missing: + { struct autofs_packet_missing *mp = &pkt.missing; pktsz = sizeof(*mp); @@ -106,7 +109,10 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, mp->len = wq->len; memcpy(mp->name, wq->name, wq->len); mp->name[wq->len] = '\0'; - } else if (type == autofs_ptype_expire_multi) { + break; + } + case autofs_ptype_expire_multi: + { struct autofs_packet_expire_multi *ep = &pkt.expire_multi; pktsz = sizeof(*ep); @@ -115,7 +121,34 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, ep->len = wq->len; memcpy(ep->name, wq->name, wq->len); ep->name[wq->len] = '\0'; - } else { + break; + } + /* + * Kernel protocol v5 packet for handling indirect and direct + * mount missing and expire requests + */ + case autofs_ptype_missing_indirect: + case autofs_ptype_expire_indirect: + case autofs_ptype_missing_direct: + case autofs_ptype_expire_direct: + { + struct autofs_v5_packet *packet = &pkt.v5_packet; + + pktsz = sizeof(*packet); + + packet->wait_queue_token = wq->wait_queue_token; + packet->len = wq->len; + memcpy(packet->name, wq->name, wq->len); + packet->name[wq->len] = '\0'; + packet->dev = wq->dev; + packet->ino = wq->ino; + packet->uid = wq->uid; + packet->gid = wq->gid; + packet->pid = wq->pid; + packet->tgid = wq->tgid; + break; + } + default: printk("autofs4_notify_daemon: bad type %d!\n", type); return; } @@ -161,7 +194,9 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, { struct autofs_wait_queue *wq; char *name; - int len, status; + unsigned int len = 0; + unsigned int hash = 0; + int status; /* In catatonic mode, we don't wait for nobody */ if (sbi->catatonic) @@ -171,11 +206,17 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, if (!name) return -ENOMEM; - len = autofs4_getpath(sbi, dentry, &name); - if (!len) { - kfree(name); - return -ENOENT; + /* If this is a direct mount request create a dummy name */ + if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYP_DIRECT)) + len = sprintf(name, "%p", dentry); + else { + len = autofs4_getpath(sbi, dentry, &name); + if (!len) { + kfree(name); + return -ENOENT; + } } + hash = full_name_hash(name, len); if (mutex_lock_interruptible(&sbi->wq_mutex)) { kfree(name); @@ -211,9 +252,15 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, wq->next = sbi->queues; sbi->queues = wq; init_waitqueue_head(&wq->queue); - wq->hash = dentry->d_name.hash; + wq->hash = hash; wq->name = name; wq->len = len; + wq->dev = autofs4_get_dev(sbi); + wq->ino = autofs4_get_ino(sbi); + wq->uid = current->uid; + wq->gid = current->gid; + wq->pid = current->pid; + wq->tgid = current->tgid; wq->status = -EINTR; /* Status return if interrupted */ atomic_set(&wq->wait_ctr, 2); atomic_set(&wq->notified, 1); @@ -227,8 +274,23 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, } if (notify != NFY_NONE && atomic_dec_and_test(&wq->notified)) { - int type = (notify == NFY_MOUNT ? - autofs_ptype_missing : autofs_ptype_expire_multi); + int type; + + if (sbi->version < 5) { + if (notify == NFY_MOUNT) + type = autofs_ptype_missing; + else + type = autofs_ptype_expire_multi; + } else { + if (notify == NFY_MOUNT) + type = (sbi->type & AUTOFS_TYP_DIRECT) ? + autofs_ptype_missing_direct : + autofs_ptype_missing_indirect; + else + type = (sbi->type & AUTOFS_TYP_DIRECT) ? + autofs_ptype_expire_direct : + autofs_ptype_expire_indirect; + } DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index d998ddcf7288..0a6bc52ffe88 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -19,18 +19,37 @@ #undef AUTOFS_MIN_PROTO_VERSION #undef AUTOFS_MAX_PROTO_VERSION -#define AUTOFS_PROTO_VERSION 4 +#define AUTOFS_PROTO_VERSION 5 #define AUTOFS_MIN_PROTO_VERSION 3 -#define AUTOFS_MAX_PROTO_VERSION 4 +#define AUTOFS_MAX_PROTO_VERSION 5 -#define AUTOFS_PROTO_SUBVERSION 10 +#define AUTOFS_PROTO_SUBVERSION 0 /* Mask for expire behaviour */ #define AUTOFS_EXP_IMMEDIATE 1 #define AUTOFS_EXP_LEAVES 2 -/* New message type */ -#define autofs_ptype_expire_multi 2 /* Expire entry (umount request) */ +/* Daemon notification packet types */ +enum autofs_notify { + NFY_NONE, + NFY_MOUNT, + NFY_EXPIRE +}; + +/* Kernel protocol version 4 packet types */ + +/* Expire entry (umount request) */ +#define autofs_ptype_expire_multi 2 + +/* Kernel protocol version 5 packet types */ + +/* Indirect mount missing and expire requests. */ +#define autofs_ptype_missing_indirect 3 +#define autofs_ptype_expire_indirect 4 + +/* Direct mount missing and expire requests */ +#define autofs_ptype_missing_direct 5 +#define autofs_ptype_expire_direct 6 /* v4 multi expire (via pipe) */ struct autofs_packet_expire_multi { @@ -40,14 +59,36 @@ struct autofs_packet_expire_multi { char name[NAME_MAX+1]; }; +/* autofs v5 common packet struct */ +struct autofs_v5_packet { + struct autofs_packet_hdr hdr; + autofs_wqt_t wait_queue_token; + __u32 dev; + __u64 ino; + __u32 uid; + __u32 gid; + __u32 pid; + __u32 tgid; + __u32 len; + char name[NAME_MAX+1]; +}; + +typedef struct autofs_v5_packet autofs_packet_missing_indirect_t; +typedef struct autofs_v5_packet autofs_packet_expire_indirect_t; +typedef struct autofs_v5_packet autofs_packet_missing_direct_t; +typedef struct autofs_v5_packet autofs_packet_expire_direct_t; + union autofs_packet_union { struct autofs_packet_hdr hdr; struct autofs_packet_missing missing; struct autofs_packet_expire expire; struct autofs_packet_expire_multi expire_multi; + struct autofs_v5_packet v5_packet; }; #define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93,0x66,int) +#define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI +#define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI #define AUTOFS_IOC_PROTOSUBVER _IOR(0x93,0x67,int) #define AUTOFS_IOC_ASKREGHOST _IOR(0x93,0x68,int) #define AUTOFS_IOC_TOGGLEREGHOST _IOR(0x93,0x69,int) -- cgit v1.2.3 From efc36aa5608f5717338747e152c23f2cfdb14697 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:14:59 -0800 Subject: [PATCH] knfsd: Change the store of auth_domains to not be a 'cache' The 'auth_domain's are simply handles on internal data structures. They do not cache information from user-space, and forcing them into the mold of a 'cache' misrepresents their true nature and causes confusion. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/export.c | 5 +- include/linux/sunrpc/svcauth.h | 12 ++-- net/sunrpc/auth_gss/svcauth_gss.c | 14 ++--- net/sunrpc/sunrpc_syms.c | 4 +- net/sunrpc/svcauth.c | 122 +++++++++++--------------------------- net/sunrpc/svcauth_unix.c | 69 ++++++++++----------- 6 files changed, 81 insertions(+), 145 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 417ec02df44f..ac0997731fce 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -242,7 +242,7 @@ static inline int svc_expkey_match (struct svc_expkey *a, struct svc_expkey *b) static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *item) { - cache_get(&item->ek_client->h); + kref_get(&item->ek_client->ref); new->ek_client = item->ek_client; new->ek_fsidtype = item->ek_fsidtype; new->ek_fsid[0] = item->ek_fsid[0]; @@ -474,7 +474,7 @@ static inline int svc_export_match(struct svc_export *a, struct svc_export *b) } static inline void svc_export_init(struct svc_export *new, struct svc_export *item) { - cache_get(&item->ex_client->h); + kref_get(&item->ex_client->ref); new->ex_client = item->ex_client; new->ex_dentry = dget(item->ex_dentry); new->ex_mnt = mntget(item->ex_mnt); @@ -1129,7 +1129,6 @@ exp_delclient(struct nfsctl_client *ncp) */ if (dom) { err = auth_unix_forget_old(dom); - dom->h.expiry_time = get_seconds(); auth_domain_put(dom); } diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index c119ce7cbd22..2fe2087edd66 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -45,9 +45,10 @@ struct svc_rqst; /* forward decl */ * of ip addresses to the given client. */ struct auth_domain { - struct cache_head h; + struct kref ref; + struct hlist_node hash; char *name; - int flavour; + struct auth_ops *flavour; }; /* @@ -86,6 +87,9 @@ struct auth_domain { * * domain_release() * This call releases a domain. + * set_client() + * Givens a pending request (struct svc_rqst), finds and assigns + * an appropriate 'auth_domain' as the client. */ struct auth_ops { char * name; @@ -117,7 +121,7 @@ extern void svc_auth_unregister(rpc_authflavor_t flavor); extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); extern int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom); -extern struct auth_domain *auth_domain_lookup(struct auth_domain *item, int set); +extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); extern struct auth_domain *auth_domain_find(char *name); extern struct auth_domain *auth_unix_lookup(struct in_addr addr); extern int auth_unix_forget_old(struct auth_domain *dom); @@ -160,8 +164,6 @@ static inline unsigned long hash_mem(char *buf, int length, int bits) return hash >> (BITS_PER_LONG - bits); } -extern struct cache_detail auth_domain_cache, ip_map_cache; - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_SVCAUTH_H_ */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 23632d84d8d7..6b073c2e6930 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -645,6 +645,8 @@ find_gss_auth_domain(struct gss_ctx *ctx, u32 svc) return auth_domain_find(name); } +static struct auth_ops svcauthops_gss; + int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) { @@ -655,20 +657,18 @@ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) new = kmalloc(sizeof(*new), GFP_KERNEL); if (!new) goto out; - cache_init(&new->h.h); + kref_init(&new->h.ref); new->h.name = kmalloc(strlen(name) + 1, GFP_KERNEL); if (!new->h.name) goto out_free_dom; strcpy(new->h.name, name); - new->h.flavour = RPC_AUTH_GSS; + new->h.flavour = &svcauthops_gss; new->pseudoflavor = pseudoflavor; - new->h.h.expiry_time = NEVER; - test = auth_domain_lookup(&new->h, 1); - if (test == &new->h) { - BUG_ON(atomic_dec_and_test(&new->h.h.refcnt)); - } else { /* XXX Duplicate registration? */ + test = auth_domain_lookup(name, &new->h); + if (test != &new->h) { /* XXX Duplicate registration? */ auth_domain_put(&new->h); + /* dangling ref-count... */ goto out; } return 0; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 9f7373203592..40401196e7de 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -142,6 +142,7 @@ EXPORT_SYMBOL(nlm_debug); extern int register_rpc_pipefs(void); extern void unregister_rpc_pipefs(void); +extern struct cache_detail ip_map_cache; static int __init init_sunrpc(void) @@ -158,7 +159,6 @@ init_sunrpc(void) #ifdef CONFIG_PROC_FS rpc_proc_init(); #endif - cache_register(&auth_domain_cache); cache_register(&ip_map_cache); out: return err; @@ -169,8 +169,6 @@ cleanup_sunrpc(void) { unregister_rpc_pipefs(); rpc_destroy_mempool(); - if (cache_unregister(&auth_domain_cache)) - printk(KERN_ERR "sunrpc: failed to unregister auth_domain cache\n"); if (cache_unregister(&ip_map_cache)) printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n"); #ifdef RPC_DEBUG diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index dda4f0c63511..5b28c6176806 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -106,112 +106,56 @@ svc_auth_unregister(rpc_authflavor_t flavor) EXPORT_SYMBOL(svc_auth_unregister); /************************************************** - * cache for domain name to auth_domain - * Entries are only added by flavours which will normally - * have a structure that 'inherits' from auth_domain. - * e.g. when an IP -> domainname is given to auth_unix, - * and the domain name doesn't exist, it will create a - * auth_unix_domain and add it to this hash table. - * If it finds the name does exist, but isn't AUTH_UNIX, - * it will complain. + * 'auth_domains' are stored in a hash table indexed by name. + * When the last reference to an 'auth_domain' is dropped, + * the object is unhashed and freed. + * If auth_domain_lookup fails to find an entry, it will return + * it's second argument 'new'. If this is non-null, it will + * have been atomically linked into the table. */ -/* - * Auth auth_domain cache is somewhat different to other caches, - * largely because the entries are possibly of different types: - * each auth flavour has it's own type. - * One consequence of this that DefineCacheLookup cannot - * allocate a new structure as it cannot know the size. - * Notice that the "INIT" code fragment is quite different - * from other caches. When auth_domain_lookup might be - * creating a new domain, the new domain is passed in - * complete and it is used as-is rather than being copied into - * another structure. - */ #define DN_HASHBITS 6 #define DN_HASHMAX (1<flavour]->domain_release(dom); -} - - -struct cache_detail auth_domain_cache = { - .owner = THIS_MODULE, - .hash_size = DN_HASHMAX, - .hash_table = auth_domain_table, - .name = "auth.domain", - .cache_put = auth_domain_drop, -}; +static struct hlist_head auth_domain_table[DN_HASHMAX]; +static spinlock_t auth_domain_lock = SPIN_LOCK_UNLOCKED; void auth_domain_put(struct auth_domain *dom) { - auth_domain_drop(&dom->h, &auth_domain_cache); -} - -static inline int auth_domain_hash(struct auth_domain *item) -{ - return hash_str(item->name, DN_HASHBITS); -} -static inline int auth_domain_match(struct auth_domain *tmp, struct auth_domain *item) -{ - return strcmp(tmp->name, item->name) == 0; + if (atomic_dec_and_lock(&dom->ref.refcount, &auth_domain_lock)) { + hlist_del(&dom->hash); + dom->flavour->domain_release(dom); + } } struct auth_domain * -auth_domain_lookup(struct auth_domain *item, int set) +auth_domain_lookup(char *name, struct auth_domain *new) { - struct auth_domain *tmp = NULL; - struct cache_head **hp, **head; - head = &auth_domain_cache.hash_table[auth_domain_hash(item)]; - - if (set) - write_lock(&auth_domain_cache.hash_lock); - else - read_lock(&auth_domain_cache.hash_lock); - for (hp=head; *hp != NULL; hp = &tmp->h.next) { - tmp = container_of(*hp, struct auth_domain, h); - if (!auth_domain_match(tmp, item)) - continue; - if (!set) { - cache_get(&tmp->h); - goto out_noset; + struct auth_domain *hp; + struct hlist_head *head; + struct hlist_node *np; + + head = &auth_domain_table[hash_str(name, DN_HASHBITS)]; + + spin_lock(&auth_domain_lock); + + hlist_for_each_entry(hp, np, head, hash) { + if (strcmp(hp->name, name)==0) { + kref_get(&hp->ref); + spin_unlock(&auth_domain_lock); + return hp; } - *hp = tmp->h.next; - tmp->h.next = NULL; - auth_domain_drop(&tmp->h, &auth_domain_cache); - goto out_set; } - /* Didn't find anything */ - if (!set) - goto out_nada; - auth_domain_cache.entries++; -out_set: - item->h.next = *head; - *head = &item->h; - cache_get(&item->h); - write_unlock(&auth_domain_cache.hash_lock); - cache_fresh(&auth_domain_cache, &item->h, item->h.expiry_time); - cache_get(&item->h); - return item; -out_nada: - tmp = NULL; -out_noset: - read_unlock(&auth_domain_cache.hash_lock); - return tmp; + if (new) { + hlist_add_head(&new->hash, head); + kref_get(&new->ref); + } + spin_unlock(&auth_domain_lock); + return new; } struct auth_domain *auth_domain_find(char *name) { - struct auth_domain *rv, ad; - - ad.name = name; - rv = auth_domain_lookup(&ad, 0); - return rv; + return auth_domain_lookup(name, NULL); } diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 3e6c694bbad1..17e8b2a3130c 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -27,41 +27,35 @@ struct unix_domain { /* other stuff later */ }; +extern struct auth_ops svcauth_unix; + struct auth_domain *unix_domain_find(char *name) { - struct auth_domain *rv, ud; - struct unix_domain *new; - - ud.name = name; - - rv = auth_domain_lookup(&ud, 0); - - foundit: - if (rv && rv->flavour != RPC_AUTH_UNIX) { - auth_domain_put(rv); - return NULL; - } - if (rv) - return rv; - - new = kmalloc(sizeof(*new), GFP_KERNEL); - if (new == NULL) - return NULL; - cache_init(&new->h.h); - new->h.name = kstrdup(name, GFP_KERNEL); - new->h.flavour = RPC_AUTH_UNIX; - new->addr_changes = 0; - new->h.h.expiry_time = NEVER; - - rv = auth_domain_lookup(&new->h, 2); - if (rv == &new->h) { - if (atomic_dec_and_test(&new->h.h.refcnt)) BUG(); - } else { - auth_domain_put(&new->h); - goto foundit; + struct auth_domain *rv; + struct unix_domain *new = NULL; + + rv = auth_domain_lookup(name, NULL); + while(1) { + if (rv != &new->h) { + if (new) auth_domain_put(&new->h); + return rv; + } + if (rv && rv->flavour != &svcauth_unix) { + auth_domain_put(rv); + return NULL; + } + if (rv) + return rv; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (new == NULL) + return NULL; + kref_init(&new->h.ref); + new->h.name = kstrdup(name, GFP_KERNEL); + new->h.flavour = &svcauth_unix; + new->addr_changes = 0; + rv = auth_domain_lookup(name, &new->h); } - - return rv; } static void svcauth_unix_domain_release(struct auth_domain *dom) @@ -130,7 +124,7 @@ static inline void ip_map_init(struct ip_map *new, struct ip_map *item) } static inline void ip_map_update(struct ip_map *new, struct ip_map *item) { - cache_get(&item->m_client->h.h); + kref_get(&item->m_client->h.ref); new->m_client = item->m_client; new->m_add_change = item->m_add_change; } @@ -272,7 +266,7 @@ int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) struct unix_domain *udom; struct ip_map ip, *ipmp; - if (dom->flavour != RPC_AUTH_UNIX) + if (dom->flavour != &svcauth_unix) return -EINVAL; udom = container_of(dom, struct unix_domain, h); strcpy(ip.m_class, "nfsd"); @@ -295,7 +289,7 @@ int auth_unix_forget_old(struct auth_domain *dom) { struct unix_domain *udom; - if (dom->flavour != RPC_AUTH_UNIX) + if (dom->flavour != &svcauth_unix) return -EINVAL; udom = container_of(dom, struct unix_domain, h); udom->addr_changes++; @@ -323,7 +317,7 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr) rv = NULL; } else { rv = &ipm->m_client->h; - cache_get(&rv->h); + kref_get(&rv->ref); } ip_map_put(&ipm->h, &ip_map_cache); return rv; @@ -332,7 +326,6 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr) void svcauth_unix_purge(void) { cache_purge(&ip_map_cache); - cache_purge(&auth_domain_cache); } static int @@ -361,7 +354,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) return SVC_DENIED; case 0: rqstp->rq_client = &ipm->m_client->h; - cache_get(&rqstp->rq_client->h); + kref_get(&rqstp->rq_client->ref); ip_map_put(&ipm->h, &ip_map_cache); break; } -- cgit v1.2.3 From eab7e2e647c348b418e8715ecaca0177e1b473c7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:00 -0800 Subject: [PATCH] knfsd: Break the hard linkage from svc_expkey to svc_export Current svc_expkey holds a pointer to the svc_export structure, so updates to that structure have to be in-place, which is a wart on the whole cache infrastruct. So we break that linkage and just do a second lookup. If this became a performance issue, it would be possible to put a direct link back in which was only used conditionally. i.e. when an object is replaced in the cache, we set a flag in the old object. When dereferencing the link from svc_expkey, if the flag is set, we drop the reference and do a fresh lookup. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/export.c | 60 ++++++++++++++++++++++++++++++--------------- include/linux/nfsd/export.h | 20 +++------------ 2 files changed, 44 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index ac0997731fce..587829ed651c 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -73,8 +73,10 @@ void expkey_put(struct cache_head *item, struct cache_detail *cd) if (cache_put(item, cd)) { struct svc_expkey *key = container_of(item, struct svc_expkey, h); if (test_bit(CACHE_VALID, &item->flags) && - !test_bit(CACHE_NEGATIVE, &item->flags)) - exp_put(key->ek_export); + !test_bit(CACHE_NEGATIVE, &item->flags)) { + dput(key->ek_dentry); + mntput(key->ek_mnt); + } auth_domain_put(key->ek_client); kfree(key); } @@ -164,26 +166,18 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) } else { struct nameidata nd; struct svc_expkey *ek; - struct svc_export *exp; err = path_lookup(buf, 0, &nd); if (err) goto out; dprintk("Found the path %s\n", buf); - exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL); - - err = -ENOENT; - if (!exp) - goto out_nd; - key.ek_export = exp; - dprintk("And found export\n"); + key.ek_mnt = nd.mnt; + key.ek_dentry = nd.dentry; ek = svc_expkey_lookup(&key, 1); if (ek) expkey_put(&ek->h, &svc_expkey_cache); - exp_put(exp); err = 0; - out_nd: path_release(&nd); } cache_flush(); @@ -214,7 +208,7 @@ static int expkey_show(struct seq_file *m, if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) { seq_printf(m, " "); - seq_path(m, ek->ek_export->ex_mnt, ek->ek_export->ex_dentry, "\\ \t\n"); + seq_path(m, ek->ek_mnt, ek->ek_dentry, "\\ \t\n"); } seq_printf(m, "\n"); return 0; @@ -252,8 +246,8 @@ static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *it static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *item) { - cache_get(&item->ek_export->h); - new->ek_export = item->ek_export; + new->ek_mnt = mntget(item->ek_mnt); + new->ek_dentry = dget(item->ek_dentry); } static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */ @@ -519,7 +513,8 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, key.ek_client = clp; key.ek_fsidtype = fsid_type; memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); - key.ek_export = exp; + key.ek_mnt = exp->ex_mnt; + key.ek_dentry = exp->ex_dentry; key.h.expiry_time = NEVER; key.h.flags = 0; @@ -741,8 +736,8 @@ exp_export(struct nfsctl_export *nxp) if ((nxp->ex_flags & NFSEXP_FSID) && (fsid_key = exp_get_fsid_key(clp, nxp->ex_dev)) && !IS_ERR(fsid_key) && - fsid_key->ek_export && - fsid_key->ek_export != exp) + fsid_key->ek_mnt && + (fsid_key->ek_mnt != nd.mnt || fsid_key->ek_dentry != nd.dentry) ) goto finish; if (exp) { @@ -912,6 +907,24 @@ out: return err; } +struct svc_export * +exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, + struct cache_req *reqp) +{ + struct svc_export *exp; + struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); + if (!ek || IS_ERR(ek)) + return ERR_PTR(PTR_ERR(ek)); + + exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp); + expkey_put(&ek->h, &svc_expkey_cache); + + if (!exp || IS_ERR(exp)) + return ERR_PTR(PTR_ERR(exp)); + return exp; +} + + /* * Called when we need the filehandle for the root of the pseudofs, * for a given NFSv4 client. The root is defined to be the @@ -922,6 +935,7 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, struct cache_req *creq) { struct svc_expkey *fsid_key; + struct svc_export *exp; int rv; u32 fsidv[2]; @@ -933,8 +947,14 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, if (!fsid_key || IS_ERR(fsid_key)) return nfserr_perm; - rv = fh_compose(fhp, fsid_key->ek_export, - fsid_key->ek_export->ex_dentry, NULL); + exp = exp_get_by_name(clp, fsid_key->ek_mnt, fsid_key->ek_dentry, creq); + if (exp == NULL) + rv = nfserr_perm; + else if (IS_ERR(exp)) + rv = nfserrno(PTR_ERR(exp)); + else + rv = fh_compose(fhp, exp, + fsid_key->ek_dentry, NULL); expkey_put(&fsid_key->h, &svc_expkey_cache); return rv; } diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 6bad4766d3d9..d52e0b7ad37b 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -67,7 +67,8 @@ struct svc_expkey { int ek_fsidtype; u32 ek_fsid[3]; - struct svc_export * ek_export; + struct vfsmount * ek_mnt; + struct dentry * ek_dentry; }; #define EX_SECURE(exp) (!((exp)->ex_flags & NFSEXP_INSECURE_PORT)) @@ -114,22 +115,9 @@ static inline void exp_get(struct svc_export *exp) { cache_get(&exp->h); } -static inline struct svc_export * +extern struct svc_export * exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, - struct cache_req *reqp) -{ - struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); - if (ek && !IS_ERR(ek)) { - struct svc_export *exp = ek->ek_export; - int err; - exp_get(exp); - expkey_put(&ek->h, &svc_expkey_cache); - if ((err = cache_check(&svc_export_cache, &exp->h, reqp))) - exp = ERR_PTR(err); - return exp; - } else - return ERR_PTR(PTR_ERR(ek)); -} + struct cache_req *reqp); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 7d317f2c9f1e9dcf4f632fa98f91d1d4a36c4cae Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:01 -0800 Subject: [PATCH] knfsd: Get rid of 'inplace' sunrpc caches These were an unnecessary wart. Also only have one 'DefineSimpleCache..' instead of two. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/export.c | 4 ++-- fs/nfsd/nfs4idmap.c | 10 ++-------- include/linux/sunrpc/cache.h | 28 +++++++++++----------------- net/sunrpc/auth_gss/svcauth_gss.c | 4 ++-- net/sunrpc/svcauth_unix.c | 2 +- 5 files changed, 18 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 587829ed651c..c591761a1ad6 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -250,7 +250,7 @@ static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey * new->ek_dentry = dget(item->ek_dentry); } -static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */ +static DefineSimpleCacheLookup(svc_expkey, svc_expkey) #define EXPORT_HASHBITS 8 #define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) @@ -482,7 +482,7 @@ static inline void svc_export_update(struct svc_export *new, struct svc_export * new->ex_fsid = item->ex_fsid; } -static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */ +static DefineSimpleCacheLookup(svc_export, svc_export) struct svc_expkey * diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 13369650cdf9..dea690aa8bb5 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -76,12 +76,6 @@ struct ent { char authname[IDMAP_NAMESZ]; }; -#define DefineSimpleCacheLookupMap(STRUCT, FUNC) \ - DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ - (struct STRUCT *item, int set), /*no setup */, \ - & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ - STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0) - /* Common entry handling */ #define ENT_HASHBITS 8 @@ -264,7 +258,7 @@ out: return error; } -static DefineSimpleCacheLookupMap(ent, idtoname); +static DefineSimpleCacheLookup(ent, idtoname); /* * Name -> ID cache @@ -390,7 +384,7 @@ out: return (error); } -static DefineSimpleCacheLookupMap(ent, nametoid); +static DefineSimpleCacheLookup(ent, nametoid); /* * Exported API diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index c4e3ea7cf154..405ac14e509a 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -133,14 +133,11 @@ struct cache_deferred_req { * If "set" == 0 : * If an entry is found, it is returned * If no entry is found, a new non-VALID entry is created. - * If "set" == 1 and INPLACE == 0 : + * If "set" == 1 : * If no entry is found a new one is inserted with data from "template" * If a non-CACHE_VALID entry is found, it is updated from template using UPDATE * If a CACHE_VALID entry is found, a new entry is swapped in with data * from "template" - * If set == 1, and INPLACE == 1 : - * As above, except that if a CACHE_VALID entry is found, we UPDATE in place - * instead of swapping in a new entry. * * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not * run but insteead CACHE_NEGATIVE is set in any new item. @@ -159,13 +156,8 @@ struct cache_deferred_req { * TEST tests if "tmp" matches "item" * INIT copies key information from "item" to "new" * UPDATE copies content information from "item" to "tmp" - * INPLACE is true if updates can happen inplace rather than allocating a new structure - * - * WARNING: any substantial changes to this must be reflected in - * net/sunrpc/svcauth.c(auth_domain_lookup) - * which is a similar routine that is open-coded. */ -#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE,INPLACE) \ +#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE) \ RTN *FNAME ARGS \ { \ RTN *tmp, *new=NULL; \ @@ -179,13 +171,13 @@ RTN *FNAME ARGS \ tmp = container_of(*hp, RTN, MEMBER); \ if (TEST) { /* found a match */ \ \ - if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ + if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ break; \ \ if (new) \ {INIT;} \ if (set) { \ - if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ + if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ { /* need to swap in new */ \ RTN *t2; \ \ @@ -206,7 +198,7 @@ RTN *FNAME ARGS \ else read_unlock(&(DETAIL)->hash_lock); \ if (set) \ cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \ - if (set && !INPLACE && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ + if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL); \ return tmp; \ } \ @@ -239,10 +231,12 @@ RTN *FNAME ARGS \ return NULL; \ } -#define DefineSimpleCacheLookup(STRUCT,INPLACE) \ - DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), /*no setup */, \ - & STRUCT##_cache, STRUCT##_hash(item), STRUCT##_match(item, tmp),\ - STRUCT##_init(new, item), STRUCT##_update(tmp, item),INPLACE) +#define DefineSimpleCacheLookup(STRUCT, FUNC) \ + DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ + (struct STRUCT *item, int set), /*no setup */, \ + & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ + STRUCT##_init(new, item), STRUCT##_update(tmp, item)) + #define cache_for_each(pos, detail, index, member) \ for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ; \ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 6b073c2e6930..aadb4e8d6aa7 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -259,7 +259,7 @@ static struct cache_detail rsi_cache = { .cache_parse = rsi_parse, }; -static DefineSimpleCacheLookup(rsi, 0) +static DefineSimpleCacheLookup(rsi, rsi) /* * The rpcsec_context cache is used to store a context that is @@ -446,7 +446,7 @@ static struct cache_detail rsc_cache = { .cache_parse = rsc_parse, }; -static DefineSimpleCacheLookup(rsc, 0); +static DefineSimpleCacheLookup(rsc, rsc); static struct rsc * gss_svc_searchbyctx(struct xdr_netobj *handle) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 17e8b2a3130c..7ddf068b5b25 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -258,7 +258,7 @@ struct cache_detail ip_map_cache = { .cache_show = ip_map_show, }; -static DefineSimpleCacheLookup(ip_map, 0) +static DefineSimpleCacheLookup(ip_map, ip_map) int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) -- cgit v1.2.3 From 15a5f6bd23eddd5b3be80366f364be04fb1c1c99 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:02 -0800 Subject: [PATCH] knfsd: Create cache_lookup function instead of using a macro to declare one The C++-like 'template' approach proves to be too ugly and hard to work with. The old 'template' won't go away until all users are updated. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/cache.h | 12 ++++++ net/sunrpc/cache.c | 98 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 405ac14e509a..3e17a5ff1dea 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -81,6 +81,11 @@ struct cache_detail { struct cache_detail *cd, struct cache_head *h); + struct cache_head * (*alloc)(void); + int (*match)(struct cache_head *orig, struct cache_head *new); + void (*init)(struct cache_head *orig, struct cache_head *new); + void (*update)(struct cache_head *orig, struct cache_head *new); + /* fields below this comment are for internal use * and should not be touched by cache owners */ @@ -237,6 +242,13 @@ RTN *FNAME ARGS \ & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ STRUCT##_init(new, item), STRUCT##_update(tmp, item)) +extern struct cache_head * +sunrpc_cache_lookup(struct cache_detail *detail, + struct cache_head *key, int hash); +extern struct cache_head * +sunrpc_cache_update(struct cache_detail *detail, + struct cache_head *new, struct cache_head *old, int hash); + #define cache_for_each(pos, detail, index, member) \ for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ; \ diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 0acccfeeb284..4449dc52edf5 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -47,6 +47,104 @@ void cache_init(struct cache_head *h) h->last_refresh = now; } +struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, + struct cache_head *key, int hash) +{ + struct cache_head **head, **hp; + struct cache_head *new = NULL; + + head = &detail->hash_table[hash]; + + read_lock(&detail->hash_lock); + + for (hp=head; *hp != NULL ; hp = &(*hp)->next) { + struct cache_head *tmp = *hp; + if (detail->match(tmp, key)) { + cache_get(tmp); + read_unlock(&detail->hash_lock); + return tmp; + } + } + read_unlock(&detail->hash_lock); + /* Didn't find anything, insert an empty entry */ + + new = detail->alloc(); + if (!new) + return NULL; + cache_init(new); + + write_lock(&detail->hash_lock); + + /* check if entry appeared while we slept */ + for (hp=head; *hp != NULL ; hp = &(*hp)->next) { + struct cache_head *tmp = *hp; + if (detail->match(tmp, key)) { + cache_get(tmp); + write_unlock(&detail->hash_lock); + detail->cache_put(new, detail); + return tmp; + } + } + detail->init(new, key); + new->next = *head; + *head = new; + detail->entries++; + cache_get(new); + write_unlock(&detail->hash_lock); + + return new; +} +EXPORT_SYMBOL(sunrpc_cache_lookup); + +struct cache_head *sunrpc_cache_update(struct cache_detail *detail, + struct cache_head *new, struct cache_head *old, int hash) +{ + /* The 'old' entry is to be replaced by 'new'. + * If 'old' is not VALID, we update it directly, + * otherwise we need to replace it + */ + struct cache_head **head; + struct cache_head *tmp; + + if (!test_bit(CACHE_VALID, &old->flags)) { + write_lock(&detail->hash_lock); + if (!test_bit(CACHE_VALID, &old->flags)) { + if (test_bit(CACHE_NEGATIVE, &new->flags)) + set_bit(CACHE_NEGATIVE, &old->flags); + else + detail->update(old, new); + /* FIXME cache_fresh should come first */ + write_unlock(&detail->hash_lock); + cache_fresh(detail, old, new->expiry_time); + return old; + } + write_unlock(&detail->hash_lock); + } + /* We need to insert a new entry */ + tmp = detail->alloc(); + if (!tmp) { + detail->cache_put(old, detail); + return NULL; + } + cache_init(tmp); + detail->init(tmp, old); + head = &detail->hash_table[hash]; + + write_lock(&detail->hash_lock); + if (test_bit(CACHE_NEGATIVE, &new->flags)) + set_bit(CACHE_NEGATIVE, &tmp->flags); + else + detail->update(tmp, new); + tmp->next = *head; + *head = tmp; + cache_get(tmp); + write_unlock(&detail->hash_lock); + cache_fresh(detail, tmp, new->expiry_time); + cache_fresh(detail, old, 0); + detail->cache_put(old, detail); + return tmp; +} +EXPORT_SYMBOL(sunrpc_cache_update); static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); /* -- cgit v1.2.3 From 4d90452cb23b08a9a9dd001010f0ee6b1ee83a45 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:07 -0800 Subject: [PATCH] knfsd: Remove DefineCacheLookup This has been replaced by more traditional code. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/cache.h | 113 ------------------------------------------- 1 file changed, 113 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 3e17a5ff1dea..afc481dd02dd 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -128,119 +128,6 @@ struct cache_deferred_req { int too_many); }; -/* - * just like a template in C++, this macro does cache lookup - * for us. - * The function is passed some sort of HANDLE from which a cache_detail - * structure can be determined (via SETUP, DETAIL), a template - * cache entry (type RTN*), and a "set" flag. Using the HASHFN and the - * TEST, the function will try to find a matching cache entry in the cache. - * If "set" == 0 : - * If an entry is found, it is returned - * If no entry is found, a new non-VALID entry is created. - * If "set" == 1 : - * If no entry is found a new one is inserted with data from "template" - * If a non-CACHE_VALID entry is found, it is updated from template using UPDATE - * If a CACHE_VALID entry is found, a new entry is swapped in with data - * from "template" - * - * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not - * run but insteead CACHE_NEGATIVE is set in any new item. - - * In any case, the new entry is returned with a reference count. - * - * - * RTN is a struct type for a cache entry - * MEMBER is the member of the cache which is cache_head, which must be first - * FNAME is the name for the function - * ARGS are arguments to function and must contain RTN *item, int set. May - * also contain something to be usedby SETUP or DETAIL to find cache_detail. - * SETUP locates the cache detail and makes it available as... - * DETAIL identifies the cache detail, possibly set up by SETUP - * HASHFN returns a hash value of the cache entry "item" - * TEST tests if "tmp" matches "item" - * INIT copies key information from "item" to "new" - * UPDATE copies content information from "item" to "tmp" - */ -#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE) \ -RTN *FNAME ARGS \ -{ \ - RTN *tmp, *new=NULL; \ - struct cache_head **hp, **head; \ - SETUP; \ - head = &(DETAIL)->hash_table[HASHFN]; \ - retry: \ - if (set||new) write_lock(&(DETAIL)->hash_lock); \ - else read_lock(&(DETAIL)->hash_lock); \ - for(hp=head; *hp != NULL; hp = &tmp->MEMBER.next) { \ - tmp = container_of(*hp, RTN, MEMBER); \ - if (TEST) { /* found a match */ \ - \ - if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ - break; \ - \ - if (new) \ - {INIT;} \ - if (set) { \ - if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ - { /* need to swap in new */ \ - RTN *t2; \ - \ - new->MEMBER.next = tmp->MEMBER.next; \ - *hp = &new->MEMBER; \ - tmp->MEMBER.next = NULL; \ - t2 = tmp; tmp = new; new = t2; \ - } \ - if (test_bit(CACHE_NEGATIVE, &item->MEMBER.flags)) \ - set_bit(CACHE_NEGATIVE, &tmp->MEMBER.flags); \ - else { \ - UPDATE; \ - clear_bit(CACHE_NEGATIVE, &tmp->MEMBER.flags); \ - } \ - } \ - cache_get(&tmp->MEMBER); \ - if (set||new) write_unlock(&(DETAIL)->hash_lock); \ - else read_unlock(&(DETAIL)->hash_lock); \ - if (set) \ - cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \ - if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ - if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL); \ - return tmp; \ - } \ - } \ - /* Didn't find anything */ \ - if (new) { \ - INIT; \ - new->MEMBER.next = *head; \ - *head = &new->MEMBER; \ - (DETAIL)->entries ++; \ - cache_get(&new->MEMBER); \ - if (set) { \ - tmp = new; \ - if (test_bit(CACHE_NEGATIVE, &item->MEMBER.flags)) \ - set_bit(CACHE_NEGATIVE, &tmp->MEMBER.flags); \ - else {UPDATE;} \ - } \ - } \ - if (set||new) write_unlock(&(DETAIL)->hash_lock); \ - else read_unlock(&(DETAIL)->hash_lock); \ - if (new && set) \ - cache_fresh(DETAIL, &new->MEMBER, item->MEMBER.expiry_time); \ - if (new) \ - return new; \ - new = kmalloc(sizeof(*new), GFP_KERNEL); \ - if (new) { \ - cache_init(&new->MEMBER); \ - goto retry; \ - } \ - return NULL; \ -} - -#define DefineSimpleCacheLookup(STRUCT, FUNC) \ - DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ - (struct STRUCT *item, int set), /*no setup */, \ - & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ - STRUCT##_init(new, item), STRUCT##_update(tmp, item)) extern struct cache_head * sunrpc_cache_lookup(struct cache_detail *detail, -- cgit v1.2.3 From ebd0cb1af3be2729cc1f574681dfba01fcf458d9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:08 -0800 Subject: [PATCH] knfsd: Unexport cache_fresh and fix a small race Cache_fresh is now only used in cache.c, so unexport it. Part of cache_fresh (setting CACHE_VALID) should really be done under the lock, while part (calling cache_revisit_request etc) must be done outside the lock. So we split it up appropriately. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/cache.h | 2 -- net/sunrpc/cache.c | 51 ++++++++++++++++++++++++++------------------ net/sunrpc/sunrpc_syms.c | 1 - 3 files changed, 30 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index afc481dd02dd..a37fead1873b 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -165,8 +165,6 @@ static inline int cache_put(struct cache_head *h, struct cache_detail *cd) } extern void cache_init(struct cache_head *h); -extern void cache_fresh(struct cache_detail *detail, - struct cache_head *head, time_t expiry); extern int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp); extern void cache_flush(void); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index b242f491cea9..edcda4fd88e8 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -96,6 +96,27 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, } EXPORT_SYMBOL(sunrpc_cache_lookup); + +static void queue_loose(struct cache_detail *detail, struct cache_head *ch); + +static int cache_fresh_locked(struct cache_head *head, time_t expiry) +{ + head->expiry_time = expiry; + head->last_refresh = get_seconds(); + return !test_and_set_bit(CACHE_VALID, &head->flags); +} + +static void cache_fresh_unlocked(struct cache_head *head, + struct cache_detail *detail, int new) +{ + if (new) + cache_revisit_request(head); + if (test_and_clear_bit(CACHE_PENDING, &head->flags)) { + cache_revisit_request(head); + queue_loose(detail, head); + } +} + struct cache_head *sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash) { @@ -105,6 +126,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, */ struct cache_head **head; struct cache_head *tmp; + int is_new; if (!test_bit(CACHE_VALID, &old->flags)) { write_lock(&detail->hash_lock); @@ -113,9 +135,9 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, set_bit(CACHE_NEGATIVE, &old->flags); else detail->update(old, new); - /* FIXME cache_fresh should come first */ + is_new = cache_fresh_locked(old, new->expiry_time); write_unlock(&detail->hash_lock); - cache_fresh(detail, old, new->expiry_time); + cache_fresh_unlocked(old, detail, is_new); return old; } write_unlock(&detail->hash_lock); @@ -138,9 +160,11 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, tmp->next = *head; *head = tmp; cache_get(tmp); + is_new = cache_fresh_locked(tmp, new->expiry_time); + cache_fresh_locked(old, 0); write_unlock(&detail->hash_lock); - cache_fresh(detail, tmp, new->expiry_time); - cache_fresh(detail, old, 0); + cache_fresh_unlocked(tmp, detail, is_new); + cache_fresh_unlocked(old, detail, 0); detail->cache_put(old, detail); return tmp; } @@ -192,7 +216,8 @@ int cache_check(struct cache_detail *detail, clear_bit(CACHE_PENDING, &h->flags); if (rv == -EAGAIN) { set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh(detail, h, get_seconds()+CACHE_NEW_EXPIRY); + cache_fresh_unlocked(h, detail, + cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY)); rv = -ENOENT; } break; @@ -213,22 +238,6 @@ int cache_check(struct cache_detail *detail, return rv; } -static void queue_loose(struct cache_detail *detail, struct cache_head *ch); - -void cache_fresh(struct cache_detail *detail, - struct cache_head *head, time_t expiry) -{ - - head->expiry_time = expiry; - head->last_refresh = get_seconds(); - if (!test_and_set_bit(CACHE_VALID, &head->flags)) - cache_revisit_request(head); - if (test_and_clear_bit(CACHE_PENDING, &head->flags)) { - cache_revisit_request(head); - queue_loose(detail, head); - } -} - /* * caches need to be periodically cleaned. * For this we maintain a list of cache_detail and diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 40401196e7de..69b8238f3d10 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -105,7 +105,6 @@ EXPORT_SYMBOL(auth_unix_lookup); EXPORT_SYMBOL(cache_check); EXPORT_SYMBOL(cache_flush); EXPORT_SYMBOL(cache_purge); -EXPORT_SYMBOL(cache_fresh); EXPORT_SYMBOL(cache_init); EXPORT_SYMBOL(cache_register); EXPORT_SYMBOL(cache_unregister); -- cgit v1.2.3 From baab935ff3bdac20c558809da0d8e8f761840219 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:09 -0800 Subject: [PATCH] knfsd: Convert sunrpc_cache to use krefs .. it makes some of the code nicer. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/export.c | 51 ++++++++++++++++++--------------------- fs/nfsd/nfs4idmap.c | 18 ++++++-------- fs/nfsd/nfsfh.c | 2 +- include/linux/nfsd/export.h | 4 +-- include/linux/sunrpc/cache.h | 13 +++++----- net/sunrpc/auth_gss/svcauth_gss.c | 28 +++++++++------------ net/sunrpc/cache.c | 20 +++++++-------- net/sunrpc/svcauth_unix.c | 20 +++++++-------- 8 files changed, 72 insertions(+), 84 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index abd68965822f..cc811a1094cb 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -57,18 +57,17 @@ static int exp_verify_string(char *cp, int max); #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) static struct cache_head *expkey_table[EXPKEY_HASHMAX]; -void expkey_put(struct cache_head *item, struct cache_detail *cd) +void expkey_put(struct kref *ref) { - if (cache_put(item, cd)) { - struct svc_expkey *key = container_of(item, struct svc_expkey, h); - if (test_bit(CACHE_VALID, &item->flags) && - !test_bit(CACHE_NEGATIVE, &item->flags)) { - dput(key->ek_dentry); - mntput(key->ek_mnt); - } - auth_domain_put(key->ek_client); - kfree(key); + struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); + + if (test_bit(CACHE_VALID, &key->h.flags) && + !test_bit(CACHE_NEGATIVE, &key->h.flags)) { + dput(key->ek_dentry); + mntput(key->ek_mnt); } + auth_domain_put(key->ek_client); + kfree(key); } static void expkey_request(struct cache_detail *cd, @@ -158,7 +157,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) set_bit(CACHE_NEGATIVE, &key.h.flags); ek = svc_expkey_update(&key, ek); if (ek) - expkey_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, &svc_expkey_cache); else err = -ENOMEM; } else { struct nameidata nd; @@ -172,7 +171,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) ek = svc_expkey_update(&key, ek); if (ek) - expkey_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, &svc_expkey_cache); else err = -ENOMEM; path_release(&nd); @@ -318,15 +317,13 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) static struct cache_head *export_table[EXPORT_HASHMAX]; -void svc_export_put(struct cache_head *item, struct cache_detail *cd) +static void svc_export_put(struct kref *ref) { - if (cache_put(item, cd)) { - struct svc_export *exp = container_of(item, struct svc_export, h); - dput(exp->ex_dentry); - mntput(exp->ex_mnt); - auth_domain_put(exp->ex_client); - kfree(exp); - } + struct svc_export *exp = container_of(ref, struct svc_export, h.ref); + dput(exp->ex_dentry); + mntput(exp->ex_mnt); + auth_domain_put(exp->ex_client); + kfree(exp); } static void svc_export_request(struct cache_detail *cd, @@ -633,7 +630,7 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, if (ek) ek = svc_expkey_update(&key,ek); if (ek) { - expkey_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, &svc_expkey_cache); return 0; } return -ENOMEM; @@ -762,7 +759,7 @@ static void exp_fsid_unhash(struct svc_export *exp) ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); if (ek && !IS_ERR(ek)) { ek->h.expiry_time = get_seconds()-1; - expkey_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, &svc_expkey_cache); } svc_expkey_cache.nextcheck = get_seconds(); } @@ -800,7 +797,7 @@ static void exp_unhash(struct svc_export *exp) ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); if (ek && !IS_ERR(ek)) { ek->h.expiry_time = get_seconds()-1; - expkey_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, &svc_expkey_cache); } svc_expkey_cache.nextcheck = get_seconds(); } @@ -902,7 +899,7 @@ finish: if (exp) exp_put(exp); if (fsid_key && !IS_ERR(fsid_key)) - expkey_put(&fsid_key->h, &svc_expkey_cache); + cache_put(&fsid_key->h, &svc_expkey_cache); if (clp) auth_domain_put(clp); path_release(&nd); @@ -1030,7 +1027,7 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, return ERR_PTR(PTR_ERR(ek)); exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp); - expkey_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, &svc_expkey_cache); if (!exp || IS_ERR(exp)) return ERR_PTR(PTR_ERR(exp)); @@ -1068,7 +1065,7 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, else rv = fh_compose(fhp, exp, fsid_key->ek_dentry, NULL); - expkey_put(&fsid_key->h, &svc_expkey_cache); + cache_put(&fsid_key->h, &svc_expkey_cache); return rv; } @@ -1187,7 +1184,7 @@ static int e_show(struct seq_file *m, void *p) cache_get(&exp->h); if (cache_check(&svc_export_cache, &exp->h, NULL)) return 0; - if (cache_put(&exp->h, &svc_export_cache)) BUG(); + cache_put(&exp->h, &svc_export_cache); return svc_export_show(m, &svc_export_cache, cp); } diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 75cfbb68b205..4b6aa60dfceb 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -96,12 +96,10 @@ ent_init(struct cache_head *cnew, struct cache_head *citm) } static void -ent_put(struct cache_head *ch, struct cache_detail *cd) +ent_put(struct kref *ref) { - if (cache_put(ch, cd)) { - struct ent *map = container_of(ch, struct ent, h); - kfree(map); - } + struct ent *map = container_of(ref, struct ent, h.ref); + kfree(map); } static struct cache_head * @@ -270,7 +268,7 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) if (res == NULL) goto out; - ent_put(&res->h, &idtoname_cache); + cache_put(&res->h, &idtoname_cache); error = 0; out: @@ -433,7 +431,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) if (res == NULL) goto out; - ent_put(&res->h, &nametoid_cache); + cache_put(&res->h, &nametoid_cache); error = 0; out: kfree(buf1); @@ -562,7 +560,7 @@ do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *), goto out_put; return 0; out_put: - ent_put(&(*item)->h, detail); + cache_put(&(*item)->h, detail); out_err: *item = NULL; return ret; @@ -613,7 +611,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen if (ret) return ret; *id = item->id; - ent_put(&item->h, &nametoid_cache); + cache_put(&item->h, &nametoid_cache); return 0; } @@ -635,7 +633,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) ret = strlen(item->name); BUG_ON(ret > IDMAP_NAMESZ); memcpy(name, item->name, ret); - ent_put(&item->h, &idtoname_cache); + cache_put(&item->h, &idtoname_cache); return ret; } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 7a3e397b4ed3..3f2ec2e6d06c 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -506,7 +506,7 @@ fh_put(struct svc_fh *fhp) nfsd_nr_put++; } if (exp) { - svc_export_put(&exp->h, &svc_export_cache); + cache_put(&exp->h, &svc_export_cache); fhp->fh_export = NULL; } return; diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index d52e0b7ad37b..a6c08a47b25c 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -102,13 +102,11 @@ int exp_rootfh(struct auth_domain *, int exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq); int nfserrno(int errno); -extern void expkey_put(struct cache_head *item, struct cache_detail *cd); -extern void svc_export_put(struct cache_head *item, struct cache_detail *cd); extern struct cache_detail svc_export_cache, svc_expkey_cache; static inline void exp_put(struct svc_export *exp) { - svc_export_put(&exp->h, &svc_export_cache); + cache_put(&exp->h, &svc_export_cache); } static inline void exp_get(struct svc_export *exp) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index a37fead1873b..ad3f5cbdb770 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -50,7 +50,7 @@ struct cache_head { time_t last_refresh; /* If CACHE_PENDING, this is when upcall * was sent, else this is when update was received */ - atomic_t refcnt; + struct kref ref; unsigned long flags; }; #define CACHE_VALID 0 /* Entry contains valid data */ @@ -68,8 +68,7 @@ struct cache_detail { atomic_t inuse; /* active user-space update or lookup */ char *name; - void (*cache_put)(struct cache_head *, - struct cache_detail*); + void (*cache_put)(struct kref *); void (*cache_request)(struct cache_detail *cd, struct cache_head *h, @@ -151,17 +150,17 @@ extern void cache_clean_deferred(void *owner); static inline struct cache_head *cache_get(struct cache_head *h) { - atomic_inc(&h->refcnt); + kref_get(&h->ref); return h; } -static inline int cache_put(struct cache_head *h, struct cache_detail *cd) +static inline void cache_put(struct cache_head *h, struct cache_detail *cd) { - if (atomic_read(&h->refcnt) <= 2 && + if (atomic_read(&h->ref.refcount) <= 2 && h->expiry_time < cd->nextcheck) cd->nextcheck = h->expiry_time; - return atomic_dec_and_test(&h->refcnt); + kref_put(&h->ref, cd->cache_put); } extern void cache_init(struct cache_head *h); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 380152603d1e..4d7eb9e704da 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -89,13 +89,11 @@ static void rsi_free(struct rsi *rsii) kfree(rsii->out_token.data); } -static void rsi_put(struct cache_head *item, struct cache_detail *cd) +static void rsi_put(struct kref *ref) { - struct rsi *rsii = container_of(item, struct rsi, h); - if (cache_put(item, cd)) { - rsi_free(rsii); - kfree(rsii); - } + struct rsi *rsii = container_of(ref, struct rsi, h.ref); + rsi_free(rsii); + kfree(rsii); } static inline int rsi_hash(struct rsi *item) @@ -267,7 +265,7 @@ static int rsi_parse(struct cache_detail *cd, out: rsi_free(&rsii); if (rsip) - rsi_put(&rsip->h, &rsi_cache); + cache_put(&rsip->h, &rsi_cache); else status = -ENOMEM; return status; @@ -357,14 +355,12 @@ static void rsc_free(struct rsc *rsci) put_group_info(rsci->cred.cr_group_info); } -static void rsc_put(struct cache_head *item, struct cache_detail *cd) +static void rsc_put(struct kref *ref) { - struct rsc *rsci = container_of(item, struct rsc, h); + struct rsc *rsci = container_of(ref, struct rsc, h.ref); - if (cache_put(item, cd)) { - rsc_free(rsci); - kfree(rsci); - } + rsc_free(rsci); + kfree(rsci); } static inline int @@ -509,7 +505,7 @@ static int rsc_parse(struct cache_detail *cd, out: rsc_free(&rsci); if (rscp) - rsc_put(&rscp->h, &rsc_cache); + cache_put(&rscp->h, &rsc_cache); else status = -ENOMEM; return status; @@ -1076,7 +1072,7 @@ drop: ret = SVC_DROP; out: if (rsci) - rsc_put(&rsci->h, &rsc_cache); + cache_put(&rsci->h, &rsc_cache); return ret; } @@ -1168,7 +1164,7 @@ out_err: put_group_info(rqstp->rq_cred.cr_group_info); rqstp->rq_cred.cr_group_info = NULL; if (gsd->rsci) - rsc_put(&gsd->rsci->h, &rsc_cache); + cache_put(&gsd->rsci->h, &rsc_cache); gsd->rsci = NULL; return stat; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index edcda4fd88e8..dd81e5928172 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -42,7 +42,7 @@ void cache_init(struct cache_head *h) time_t now = get_seconds(); h->next = NULL; h->flags = 0; - atomic_set(&h->refcnt, 1); + kref_init(&h->ref); h->expiry_time = now + CACHE_NEW_EXPIRY; h->last_refresh = now; } @@ -81,7 +81,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, if (detail->match(tmp, key)) { cache_get(tmp); write_unlock(&detail->hash_lock); - detail->cache_put(new, detail); + cache_put(new, detail); return tmp; } } @@ -145,7 +145,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, /* We need to insert a new entry */ tmp = detail->alloc(); if (!tmp) { - detail->cache_put(old, detail); + cache_put(old, detail); return NULL; } cache_init(tmp); @@ -165,7 +165,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, write_unlock(&detail->hash_lock); cache_fresh_unlocked(tmp, detail, is_new); cache_fresh_unlocked(old, detail, 0); - detail->cache_put(old, detail); + cache_put(old, detail); return tmp; } EXPORT_SYMBOL(sunrpc_cache_update); @@ -234,7 +234,7 @@ int cache_check(struct cache_detail *detail, cache_defer_req(rqstp, h); if (rv) - detail->cache_put(h, detail); + cache_put(h, detail); return rv; } @@ -431,7 +431,7 @@ static int cache_clean(void) if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) queue_loose(current_detail, ch); - if (atomic_read(&ch->refcnt) == 1) + if (atomic_read(&ch->ref.refcount) == 1) break; } if (ch) { @@ -446,7 +446,7 @@ static int cache_clean(void) current_index ++; spin_unlock(&cache_list_lock); if (ch) - d->cache_put(ch, d); + cache_put(ch, d); } else spin_unlock(&cache_list_lock); @@ -723,7 +723,7 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) !test_bit(CACHE_PENDING, &rq->item->flags)) { list_del(&rq->q.list); spin_unlock(&queue_lock); - cd->cache_put(rq->item, cd); + cache_put(rq->item, cd); kfree(rq->buf); kfree(rq); } else @@ -906,7 +906,7 @@ static void queue_loose(struct cache_detail *detail, struct cache_head *ch) continue; list_del(&cr->q.list); spin_unlock(&queue_lock); - detail->cache_put(cr->item, detail); + cache_put(cr->item, detail); kfree(cr->buf); kfree(cr); return; @@ -1192,7 +1192,7 @@ static int c_show(struct seq_file *m, void *p) ifdebug(CACHE) seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", - cp->expiry_time, atomic_read(&cp->refcnt), cp->flags); + cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags); cache_get(cp); if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 7e38621a20b7..11020c0b7db5 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -84,15 +84,15 @@ struct ip_map { }; static struct cache_head *ip_table[IP_HASHMAX]; -static void ip_map_put(struct cache_head *item, struct cache_detail *cd) +static void ip_map_put(struct kref *kref) { + struct cache_head *item = container_of(kref, struct cache_head, ref); struct ip_map *im = container_of(item, struct ip_map,h); - if (cache_put(item, cd)) { - if (test_bit(CACHE_VALID, &item->flags) && - !test_bit(CACHE_NEGATIVE, &item->flags)) - auth_domain_put(&im->m_client->h); - kfree(im); - } + + if (test_bit(CACHE_VALID, &item->flags) && + !test_bit(CACHE_NEGATIVE, &item->flags)) + auth_domain_put(&im->m_client->h); + kfree(im); } #if IP_HASHBITS == 8 @@ -315,7 +315,7 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex hash_ip((unsigned long)ipm->m_addr.s_addr)); if (!ch) return -ENOMEM; - ip_map_put(ch, &ip_map_cache); + cache_put(ch, &ip_map_cache); return 0; } @@ -369,7 +369,7 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr) rv = &ipm->m_client->h; kref_get(&rv->ref); } - ip_map_put(&ipm->h, &ip_map_cache); + cache_put(&ipm->h, &ip_map_cache); return rv; } @@ -403,7 +403,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) case 0: rqstp->rq_client = &ipm->m_client->h; kref_get(&rqstp->rq_client->ref); - ip_map_put(&ipm->h, &ip_map_cache); + cache_put(&ipm->h, &ip_map_cache); break; } return SVC_OK; -- cgit v1.2.3 From 74cae61ab45f19a3e8c4d9f53c0e94df129c7915 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 27 Mar 2006 01:15:10 -0800 Subject: [PATCH] fs/nfsd/export.c,net/sunrpc/cache.c: make needlessly global code static We can now make some code static. Signed-off-by: Adrian Bunk Cc: Neil Brown Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/export.c | 13 ++++++++----- include/linux/nfsd/export.h | 5 +---- include/linux/sunrpc/cache.h | 1 - net/sunrpc/cache.c | 2 +- net/sunrpc/sunrpc_syms.c | 1 - 5 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index cc811a1094cb..c340be0a3f59 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -57,7 +57,7 @@ static int exp_verify_string(char *cp, int max); #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) static struct cache_head *expkey_table[EXPKEY_HASHMAX]; -void expkey_put(struct kref *ref) +static void expkey_put(struct kref *ref) { struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); @@ -87,6 +87,8 @@ static void expkey_request(struct cache_detail *cd, static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); +static struct cache_detail svc_expkey_cache; + static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) { /* client fsidtype fsid [path] */ @@ -255,7 +257,7 @@ static struct cache_head *expkey_alloc(void) return NULL; } -struct cache_detail svc_expkey_cache = { +static struct cache_detail svc_expkey_cache = { .owner = THIS_MODULE, .hash_size = EXPKEY_HASHMAX, .hash_table = expkey_table, @@ -345,7 +347,8 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); +static struct svc_export *svc_export_update(struct svc_export *new, + struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); static int check_export(struct inode *inode, int flags) @@ -574,7 +577,7 @@ svc_export_lookup(struct svc_export *exp) return NULL; } -struct svc_export * +static struct svc_export * svc_export_update(struct svc_export *new, struct svc_export *old) { struct cache_head *ch; @@ -593,7 +596,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old) } -struct svc_expkey * +static struct svc_expkey * exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) { struct svc_expkey key, *ek; diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index a6c08a47b25c..d2a8abb5011a 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -86,9 +86,6 @@ void nfsd_export_shutdown(void); void nfsd_export_flush(void); void exp_readlock(void); void exp_readunlock(void); -struct svc_expkey * exp_find_key(struct auth_domain *clp, - int fsid_type, u32 *fsidv, - struct cache_req *reqp); struct svc_export * exp_get_by_name(struct auth_domain *clp, struct vfsmount *mnt, struct dentry *dentry, @@ -102,7 +99,7 @@ int exp_rootfh(struct auth_domain *, int exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq); int nfserrno(int errno); -extern struct cache_detail svc_export_cache, svc_expkey_cache; +extern struct cache_detail svc_export_cache; static inline void exp_put(struct svc_export *exp) { diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index ad3f5cbdb770..b5612c958cce 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -163,7 +163,6 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) kref_put(&h->ref, cd->cache_put); } -extern void cache_init(struct cache_head *h); extern int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp); extern void cache_flush(void); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index dd81e5928172..3ac4193a78ed 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -37,7 +37,7 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); -void cache_init(struct cache_head *h) +static void cache_init(struct cache_head *h) { time_t now = get_seconds(); h->next = NULL; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 69b8238f3d10..769114f0f886 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -105,7 +105,6 @@ EXPORT_SYMBOL(auth_unix_lookup); EXPORT_SYMBOL(cache_check); EXPORT_SYMBOL(cache_flush); EXPORT_SYMBOL(cache_purge); -EXPORT_SYMBOL(cache_init); EXPORT_SYMBOL(cache_register); EXPORT_SYMBOL(cache_unregister); EXPORT_SYMBOL(qword_add); -- cgit v1.2.3 From 1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Mon, 27 Mar 2006 01:15:22 -0800 Subject: [PATCH] sched: new sched domain for representing multi-core Add a new sched domain for representing multi-core with shared caches between cores. Consider a dual package system, each package containing two cores and with last level cache shared between cores with in a package. If there are two runnable processes, with this appended patch those two processes will be scheduled on different packages. On such systems, with this patch we have observed 8% perf improvement with specJBB(2 warehouse) benchmark and 35% improvement with CFP2000 rate(with 2 users). This new domain will come into play only on multi-core systems with shared caches. On other systems, this sched domain will be removed by domain degeneration code. This new domain can be also used for implementing power savings policy (see OLS 2005 CMP kernel scheduler paper for more details.. I will post another patch for power savings policy soon) Most of the arch/* file changes are for cpu_coregroup_map() implementation. Signed-off-by: Suresh Siddha Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 9 +++++ arch/i386/kernel/cpu/common.c | 10 +++-- arch/i386/kernel/cpu/intel_cacheinfo.c | 22 +++++++++- arch/i386/kernel/smpboot.c | 24 +++++++++++ arch/x86_64/Kconfig | 9 +++++ arch/x86_64/kernel/setup.c | 3 +- arch/x86_64/kernel/smpboot.c | 24 +++++++++++ include/asm-i386/processor.h | 5 +++ include/asm-i386/topology.h | 2 + include/asm-x86_64/processor.h | 4 ++ include/asm-x86_64/smp.h | 1 + include/asm-x86_64/topology.h | 2 + include/linux/topology.h | 9 +++++ kernel/sched.c | 73 +++++++++++++++++++++++++++++++--- 14 files changed, 186 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index f7db71d0b913..f17bd1d2707e 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -231,6 +231,15 @@ config SCHED_SMT cost of slightly increased overhead in some places. If unsure say N here. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + source "kernel/Kconfig.preempt" config X86_UP_APIC diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 7e3d6b6a4e96..a06a49075f10 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -266,7 +266,7 @@ static void __init early_cpu_detect(void) void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; - int junk; + int ebx; if (have_cpuid_p()) { /* Get vendor name */ @@ -282,7 +282,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) /* Intel-defined flags: level 0x00000001 */ if ( c->cpuid_level >= 0x00000001 ) { u32 capability, excap; - cpuid(0x00000001, &tfms, &junk, &excap, &capability); + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; c->x86 = (tfms >> 8) & 15; @@ -292,6 +292,11 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; c->x86_mask = tfms & 15; +#ifdef CONFIG_SMP + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); +#else + c->apicid = (ebx >> 24) & 0xFF; +#endif } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; @@ -474,7 +479,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index ce61921369e5..7e7fd4e67dd0 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -173,6 +173,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ + unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; +#ifdef CONFIG_SMP + unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); +#endif if (c->cpuid_level > 3) { static int is_initialized; @@ -205,9 +209,15 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) break; case 2: new_l2 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l2_id = c->apicid >> index_msb; break; case 3: new_l3 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l3_id = c->apicid >> index_msb; break; default: break; @@ -273,11 +283,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) if (new_l1i) l1i = new_l1i; - if (new_l2) + if (new_l2) { l2 = new_l2; +#ifdef CONFIG_SMP + cpu_llc_id[cpu] = l2_id; +#endif + } - if (new_l3) + if (new_l3) { l3 = new_l3; +#ifdef CONFIG_SMP + cpu_llc_id[cpu] = l3_id; +#endif + } if ( trace ) printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 82371d83bfa9..a6969903f2d6 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; /* Core ID of each logical CPU */ int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; +/* Last level cache ID of each logical CPU */ +int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; + /* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); @@ -440,6 +443,18 @@ static void __devinit smp_callin(void) static int cpucount; +/* maps the cpu to the sched domain representing multi-core */ +cpumask_t cpu_coregroup_map(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + /* + * For perf, we return last level cache shared map. + * TBD: when power saving sched policy is added, we will return + * cpu_core_map when power saving policy is enabled + */ + return c->llc_shared_map; +} + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -459,12 +474,16 @@ set_cpu_sibling_map(int cpu) cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } + cpu_set(cpu, c[cpu].llc_shared_map); + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; c[cpu].booted_cores = 1; @@ -472,6 +491,11 @@ set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (cpu_llc_id[cpu] != BAD_APICID && + cpu_llc_id[cpu] == cpu_llc_id[i]) { + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); + } if (phys_proc_id[cpu] == phys_proc_id[i]) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 45efe0ca88f8..1cb4aa241c8c 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -250,6 +250,15 @@ config SCHED_SMT cost of slightly increased overhead in some places. If unsure say N here. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + source "kernel/Kconfig.preempt" config NUMA diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index a57eec8311a7..d1f3e9272c05 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -962,7 +962,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - c->apicid = phys_pkg_id(0); if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; @@ -1171,6 +1170,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[2] = cpuid_edx(0x80860001); } + c->apicid = phys_pkg_id(0); + /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 66e98659d077..ea48fa638070 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -68,6 +68,9 @@ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; /* core ID of each logical CPU */ u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; +/* Last level cache ID of each logical CPU */ +u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; + /* Bitmask of currently online CPUs */ cpumask_t cpu_online_map __read_mostly; @@ -445,6 +448,18 @@ void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +/* maps the cpu to the sched domain representing multi-core */ +cpumask_t cpu_coregroup_map(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + /* + * For perf, we return last level cache shared map. + * TBD: when power saving sched policy is added, we will return + * cpu_core_map when power saving policy is enabled + */ + return c->llc_shared_map; +} + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -463,12 +478,16 @@ static inline void set_cpu_sibling_map(int cpu) cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } + cpu_set(cpu, c[cpu].llc_shared_map); + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; c[cpu].booted_cores = 1; @@ -476,6 +495,11 @@ static inline void set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (cpu_llc_id[cpu] != BAD_APICID && + cpu_llc_id[cpu] == cpu_llc_id[i]) { + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); + } if (phys_proc_id[cpu] == phys_proc_id[i]) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index feca5d961e2b..af4bfd012475 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -20,6 +20,7 @@ #include #include #include +#include /* flag for disabling the tsc */ extern int tsc_disable; @@ -67,6 +68,9 @@ struct cpuinfo_x86 { char pad0; int x86_power; unsigned long loops_per_jiffy; +#ifdef CONFIG_SMP + cpumask_t llc_shared_map; /* cpus sharing the last level cache */ +#endif unsigned char x86_max_cores; /* cpuid returned max cores value */ unsigned char booted_cores; /* number of cores as seen by OS */ unsigned char apicid; @@ -103,6 +107,7 @@ extern struct cpuinfo_x86 cpu_data[]; extern int phys_proc_id[NR_CPUS]; extern int cpu_core_id[NR_CPUS]; +extern int cpu_llc_id[NR_CPUS]; extern char ignore_fpu_irq; extern void identify_cpu(struct cpuinfo_x86 *); diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index aa958c6ee83e..b94e5eeef917 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -112,4 +112,6 @@ extern unsigned long node_remap_size[]; #endif /* CONFIG_NUMA */ +extern cpumask_t cpu_coregroup_map(int cpu); + #endif /* _ASM_I386_TOPOLOGY_H */ diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 8c8d88c036ed..1aa2cee43344 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -20,6 +20,7 @@ #include #include #include +#include #define TF_MASK 0x00000100 #define IF_MASK 0x00000200 @@ -65,6 +66,9 @@ struct cpuinfo_x86 { __u32 x86_power; __u32 extended_cpuid_level; /* Max extended CPUID function supported */ unsigned long loops_per_jiffy; +#ifdef CONFIG_SMP + cpumask_t llc_shared_map; /* cpus sharing the last level cache */ +#endif __u8 apicid; __u8 booted_cores; /* number of cores as seen by OS */ } ____cacheline_aligned; diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 9ccbb2cfd5c0..a4fdaeb5c397 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -56,6 +56,7 @@ extern cpumask_t cpu_sibling_map[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; extern u8 phys_proc_id[NR_CPUS]; extern u8 cpu_core_id[NR_CPUS]; +extern u8 cpu_llc_id[NR_CPUS]; #define SMP_TRAMPOLINE_BASE 0x6000 diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index c642f5d9882d..9db54e9d17bb 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -68,4 +68,6 @@ extern int __node_distance(int, int); #include +extern cpumask_t cpu_coregroup_map(int cpu); + #endif diff --git a/include/linux/topology.h b/include/linux/topology.h index e8eb0040ce3a..a305ae2e44b6 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -164,6 +164,15 @@ .nr_balance_failed = 0, \ } +#ifdef CONFIG_SCHED_MC +#ifndef SD_MC_INIT +/* for now its same as SD_CPU_INIT. + * TBD: Tune Domain parameters! + */ +#define SD_MC_INIT SD_CPU_INIT +#endif +#endif + #ifdef CONFIG_NUMA #ifndef SD_NODE_INIT #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! diff --git a/kernel/sched.c b/kernel/sched.c index a96a05d23262..8a8b71b5751b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5574,11 +5574,31 @@ static int cpu_to_cpu_group(int cpu) } #endif +#ifdef CONFIG_SCHED_MC +static DEFINE_PER_CPU(struct sched_domain, core_domains); +static struct sched_group sched_group_core[NR_CPUS]; +#endif + +#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) +static int cpu_to_core_group(int cpu) +{ + return first_cpu(cpu_sibling_map[cpu]); +} +#elif defined(CONFIG_SCHED_MC) +static int cpu_to_core_group(int cpu) +{ + return cpu; +} +#endif + static DEFINE_PER_CPU(struct sched_domain, phys_domains); static struct sched_group sched_group_phys[NR_CPUS]; static int cpu_to_phys_group(int cpu) { -#ifdef CONFIG_SCHED_SMT +#if defined(CONFIG_SCHED_MC) + cpumask_t mask = cpu_coregroup_map(cpu); + return first_cpu(mask); +#elif defined(CONFIG_SCHED_SMT) return first_cpu(cpu_sibling_map[cpu]); #else return cpu; @@ -5676,6 +5696,17 @@ void build_sched_domains(const cpumask_t *cpu_map) sd->parent = p; sd->groups = &sched_group_phys[group]; +#ifdef CONFIG_SCHED_MC + p = sd; + sd = &per_cpu(core_domains, i); + group = cpu_to_core_group(i); + *sd = SD_MC_INIT; + sd->span = cpu_coregroup_map(i); + cpus_and(sd->span, sd->span, *cpu_map); + sd->parent = p; + sd->groups = &sched_group_core[group]; +#endif + #ifdef CONFIG_SCHED_SMT p = sd; sd = &per_cpu(cpu_domains, i); @@ -5701,6 +5732,19 @@ void build_sched_domains(const cpumask_t *cpu_map) } #endif +#ifdef CONFIG_SCHED_MC + /* Set up multi-core groups */ + for_each_cpu_mask(i, *cpu_map) { + cpumask_t this_core_map = cpu_coregroup_map(i); + cpus_and(this_core_map, this_core_map, *cpu_map); + if (i != first_cpu(this_core_map)) + continue; + init_sched_build_groups(sched_group_core, this_core_map, + &cpu_to_core_group); + } +#endif + + /* Set up physical groups */ for (i = 0; i < MAX_NUMNODES; i++) { cpumask_t nodemask = node_to_cpumask(i); @@ -5797,11 +5841,31 @@ void build_sched_domains(const cpumask_t *cpu_map) power = SCHED_LOAD_SCALE; sd->groups->cpu_power = power; #endif +#ifdef CONFIG_SCHED_MC + sd = &per_cpu(core_domains, i); + power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1) + * SCHED_LOAD_SCALE / 10; + sd->groups->cpu_power = power; + + sd = &per_cpu(phys_domains, i); + /* + * This has to be < 2 * SCHED_LOAD_SCALE + * Lets keep it SCHED_LOAD_SCALE, so that + * while calculating NUMA group's cpu_power + * we can simply do + * numa_group->cpu_power += phys_group->cpu_power; + * + * See "only add power once for each physical pkg" + * comment below + */ + sd->groups->cpu_power = SCHED_LOAD_SCALE; +#else sd = &per_cpu(phys_domains, i); power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * (cpus_weight(sd->groups->cpumask)-1) / 10; sd->groups->cpu_power = power; +#endif #ifdef CONFIG_NUMA sd = &per_cpu(allnodes_domains, i); @@ -5823,7 +5887,6 @@ void build_sched_domains(const cpumask_t *cpu_map) next_sg: for_each_cpu_mask(j, sg->cpumask) { struct sched_domain *sd; - int power; sd = &per_cpu(phys_domains, j); if (j != first_cpu(sd->groups->cpumask)) { @@ -5833,10 +5896,8 @@ next_sg: */ continue; } - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - sg->cpu_power += power; + sg->cpu_power += sd->groups->cpu_power; } sg = sg->next; if (sg != sched_group_nodes[i]) @@ -5849,6 +5910,8 @@ next_sg: struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT sd = &per_cpu(cpu_domains, i); +#elif defined(CONFIG_SCHED_MC) + sd = &per_cpu(core_domains, i); #else sd = &per_cpu(phys_domains, i); #endif -- cgit v1.2.3 From a117e66ed45ac0569c039ea60bd7a9a61e031858 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:25 -0800 Subject: [PATCH] unify pfn_to_page: generic functions There are 3 memory models, FLATMEM, DISCONTIGMEM, SPARSEMEM. Each arch has its own page_to_pfn(), pfn_to_page() for each models. But most of them can use the same arithmetic. This patch adds asm-generic/memory_model.h, which includes generic page_to_pfn(), pfn_to_page() definitions for each memory model. When CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y, out-of-line functions are used instead of macro. This is enabled by some archs and reduces text size. Signed-off-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Andi Kleen Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Russell King Cc: Ian Molton Cc: Mikael Starvik Cc: David Howells Cc: Yoshinori Sato Cc: Hirokazu Takata Cc: Ralf Baechle Cc: Kyle McMartin Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Paul Mundt Cc: Kazumoto Kojima Cc: Richard Curnow Cc: William Lee Irwin III Cc: "David S. Miller" Cc: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Cc: Miles Bader Cc: Chris Zankel Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/memory_model.h | 77 ++++++++++++++++++++++++++++++++++++++ include/asm-sparc64/page.h | 2 + include/linux/mmzone.h | 11 ------ mm/page_alloc.c | 42 +++++++++++++++++++++ 4 files changed, 121 insertions(+), 11 deletions(-) create mode 100644 include/asm-generic/memory_model.h (limited to 'include/linux') diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h new file mode 100644 index 000000000000..a7bb4978e808 --- /dev/null +++ b/include/asm-generic/memory_model.h @@ -0,0 +1,77 @@ +#ifndef __ASM_MEMORY_MODEL_H +#define __ASM_MEMORY_MODEL_H + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +#if defined(CONFIG_FLATMEM) + +#ifndef ARCH_PFN_OFFSET +#define ARCH_PFN_OFFSET (0UL) +#endif + +#elif defined(CONFIG_DISCONTIGMEM) + +#ifndef arch_pfn_to_nid +#define arch_pfn_to_nid(pfn) pfn_to_nid(pfn) +#endif + +#ifndef arch_local_page_offset +#define arch_local_page_offset(pfn, nid) \ + ((pfn) - NODE_DATA(nid)->node_start_pfn) +#endif + +#endif /* CONFIG_DISCONTIGMEM */ + +#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE +struct page; +/* this is useful when inlined pfn_to_page is too big */ +extern struct page *pfn_to_page(unsigned long pfn); +extern unsigned long page_to_pfn(struct page *page); +#else +/* + * supports 3 memory models. + */ +#if defined(CONFIG_FLATMEM) + +#define pfn_to_page(pfn) (mem_map + ((pfn) - ARCH_PFN_OFFSET)) +#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + \ + ARCH_PFN_OFFSET) +#elif defined(CONFIG_DISCONTIGMEM) + +#define pfn_to_page(pfn) \ +({ unsigned long __pfn = (pfn); \ + unsigned long __nid = arch_pfn_to_nid(pfn); \ + NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\ +}) + +#define page_to_pfn(pg) \ +({ struct page *__pg = (pg); \ + struct zone *__zone = page_zone(__pg); \ + (unsigned long)(__pg - __zone->zone_mem_map) + \ + __zone->zone_start_pfn; \ +}) + +#elif defined(CONFIG_SPARSEMEM) +/* + * Note: section's mem_map is encorded to reflect its start_pfn. + * section[i].section_mem_map == mem_map's address - start_pfn; + */ +#define page_to_pfn(pg) \ +({ struct page *__pg = (pg); \ + int __sec = page_to_section(__pg); \ + __pg - __section_mem_map_addr(__nr_to_section(__sec)); \ +}) + +#define pfn_to_page(pfn) \ +({ unsigned long __pfn = (pfn); \ + struct mem_section *__sec = __pfn_to_section(__pfn); \ + __section_mem_map_addr(__sec) + __pfn; \ +}) +#endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */ +#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ + +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#endif diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h index 66fe4ac59fd6..aabb21906724 100644 --- a/include/asm-sparc64/page.h +++ b/include/asm-sparc64/page.h @@ -111,6 +111,8 @@ typedef unsigned long pgprot_t; (_AC(0x0000000070000000,UL)) : \ (_AC(0xfffff80000000000,UL) + (1UL << 32UL))) +#include + #endif /* !(__ASSEMBLY__) */ /* to align the pointer to the (next) page boundary */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ebfc238cc243..0c1c0c0cce65 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -602,17 +602,6 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn) return __nr_to_section(pfn_to_section_nr(pfn)); } -#define pfn_to_page(pfn) \ -({ \ - unsigned long __pfn = (pfn); \ - __section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn; \ -}) -#define page_to_pfn(page) \ -({ \ - page - __section_mem_map_addr(__nr_to_section( \ - page_to_section(page))); \ -}) - static inline int pfn_valid(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 338a02bb004d..349b328763b7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2745,3 +2745,45 @@ void *__init alloc_large_system_hash(const char *tablename, return table; } + +#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE +/* + * pfn <-> page translation. out-of-line version. + * (see asm-generic/memory_model.h) + */ +#if defined(CONFIG_FLATMEM) +struct page *pfn_to_page(unsigned long pfn) +{ + return mem_map + (pfn - ARCH_PFN_OFFSET); +} +unsigned long page_to_pfn(struct page *page) +{ + return (page - mem_map) + ARCH_PFN_OFFSET; +} +#elif defined(CONFIG_DISCONTIGMEM) +struct page *pfn_to_page(unsigned long pfn) +{ + int nid = arch_pfn_to_nid(pfn); + return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid); +} +unsigned long page_to_pfn(struct page *page) +{ + struct zone *zone = page_zone(page); + return (page - zone->zone_mem_map) + zone->zone_start_pfn; + +} +#elif defined(CONFIG_SPARSEMEM) +struct page *pfn_to_page(unsigned long pfn) +{ + return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn; +} + +unsigned long page_to_pfn(struct page *page) +{ + long section_id = page_to_section(page); + return page - __section_mem_map_addr(__nr_to_section(section_id)); +} +#endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */ +EXPORT_SYMBOL(pfn_to_page); +EXPORT_SYMBOL(page_to_pfn); +#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ -- cgit v1.2.3 From a0140c1d85637ee5f4ea7c78f066e3611a6a79dc Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:55 -0800 Subject: [PATCH] remove zone_mem_map This patch removes zone_mem_map. pfn_to_page uses pgdat, page_to_pfn uses zone. page_to_pfn can use pgdat instead of zone, which is only one user of zone_mem_map. By modifing it, we can remove zone_mem_map. Signed-off-by: KAMEZAWA Hiroyuki Cc: Dave Hansen Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/mmzone.h | 3 +-- include/asm-generic/memory_model.h | 10 +++++----- include/linux/mmzone.h | 1 - mm/page_alloc.c | 6 ++---- 4 files changed, 8 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h index c9004398f273..192d80c875b0 100644 --- a/include/asm-alpha/mmzone.h +++ b/include/asm-alpha/mmzone.h @@ -83,8 +83,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) pte_t pte; \ unsigned long pfn; \ \ - pfn = ((unsigned long)((page)-page_zone(page)->zone_mem_map)) << 32; \ - pfn += page_zone(page)->zone_start_pfn << 32; \ + pfn = page_to_pfn(page) << 32; \ pte_val(pte) = pfn | pgprot_val(pgprot); \ \ pte; \ diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index a7bb4978e808..0cfb086dd373 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -45,11 +45,11 @@ extern unsigned long page_to_pfn(struct page *page); NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\ }) -#define page_to_pfn(pg) \ -({ struct page *__pg = (pg); \ - struct zone *__zone = page_zone(__pg); \ - (unsigned long)(__pg - __zone->zone_mem_map) + \ - __zone->zone_start_pfn; \ +#define page_to_pfn(pg) \ +({ struct page *__pg = (pg); \ + struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg)); \ + (unsigned long)(__pg - __pgdat->node_mem_map) + \ + __pgdat->node_start_pfn; \ }) #elif defined(CONFIG_SPARSEMEM) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0c1c0c0cce65..ace31c515a8c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -225,7 +225,6 @@ struct zone { * Discontig memory support fields. */ struct pglist_data *zone_pgdat; - struct page *zone_mem_map; /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 349b328763b7..8dc8f2735d22 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2042,7 +2042,6 @@ static __meminit void init_currently_empty_zone(struct zone *zone, zone_wait_table_init(zone, size); pgdat->nr_zones = zone_idx(zone) + 1; - zone->zone_mem_map = pfn_to_page(zone_start_pfn); zone->zone_start_pfn = zone_start_pfn; memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); @@ -2768,9 +2767,8 @@ struct page *pfn_to_page(unsigned long pfn) } unsigned long page_to_pfn(struct page *page) { - struct zone *zone = page_zone(page); - return (page - zone->zone_mem_map) + zone->zone_start_pfn; - + struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); + return (page - pgdat->node_mem_map) + pgdat->node_start_pfn; } #elif defined(CONFIG_SPARSEMEM) struct page *pfn_to_page(unsigned long pfn) -- cgit v1.2.3 From 8357f8695d58b50fbf2bd507b4b0fc2cd1e43bd6 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:57 -0800 Subject: [PATCH] define for_each_online_pgdat This patch defines for_each_online_pgdat() as a replacement of for_each_pgdat() Now, online nodes are managed by node_online_map. But for_each_pgdat() uses pgdat_link to iterate over all nodes(pgdat). This means management structure for online pgdat is duplicated. I think using node_online_map for for_each_pgdat() is simple and sane rather ather than pgdat_link. New macro is named as for_each_online_pgdat(). Following patch will fix callers of for_each_pgdat(). The bootmem allocater uses for_each_pgdat() before pgdat initialization. I don't think it's sane. Following patch will fix it. Signed-off-by: Yasunori Goto Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 108 +++++++++++++++++++++++++---------------------- include/linux/nodemask.h | 4 ++ 2 files changed, 61 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ace31c515a8c..96eb08025092 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -13,6 +13,7 @@ #include #include #include +#include #include /* Free memory management - zoned buddy allocator. */ @@ -349,57 +350,6 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); */ #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) -/** - * for_each_pgdat - helper macro to iterate over all nodes - * @pgdat - pointer to a pg_data_t variable - * - * Meant to help with common loops of the form - * pgdat = pgdat_list; - * while(pgdat) { - * ... - * pgdat = pgdat->pgdat_next; - * } - */ -#define for_each_pgdat(pgdat) \ - for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next) - -/* - * next_zone - helper magic for for_each_zone() - * Thanks to William Lee Irwin III for this piece of ingenuity. - */ -static inline struct zone *next_zone(struct zone *zone) -{ - pg_data_t *pgdat = zone->zone_pgdat; - - if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) - zone++; - else if (pgdat->pgdat_next) { - pgdat = pgdat->pgdat_next; - zone = pgdat->node_zones; - } else - zone = NULL; - - return zone; -} - -/** - * for_each_zone - helper macro to iterate over all memory zones - * @zone - pointer to struct zone variable - * - * The user only needs to declare the zone variable, for_each_zone - * fills it in. This basically means for_each_zone() is an - * easier to read version of this piece of code: - * - * for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next) - * for (i = 0; i < MAX_NR_ZONES; ++i) { - * struct zone * z = pgdat->node_zones + i; - * ... - * } - * } - */ -#define for_each_zone(zone) \ - for (zone = pgdat_list->node_zones; zone; zone = next_zone(zone)) - static inline int populated_zone(struct zone *zone) { return (!!zone->present_pages); @@ -471,6 +421,62 @@ extern struct pglist_data contig_page_data; #endif /* !CONFIG_NEED_MULTIPLE_NODES */ +static inline struct pglist_data *first_online_pgdat(void) +{ + return NODE_DATA(first_online_node); +} + +static inline struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) +{ + int nid = next_online_node(pgdat->node_id); + + if (nid == MAX_NUMNODES) + return NULL; + return NODE_DATA(nid); +} + + +/** + * for_each_pgdat - helper macro to iterate over all nodes + * @pgdat - pointer to a pg_data_t variable + */ +#define for_each_online_pgdat(pgdat) \ + for (pgdat = first_online_pgdat(); \ + pgdat; \ + pgdat = next_online_pgdat(pgdat)) + +/* + * next_zone - helper magic for for_each_zone() + * Thanks to William Lee Irwin III for this piece of ingenuity. + */ +static inline struct zone *next_zone(struct zone *zone) +{ + pg_data_t *pgdat = zone->zone_pgdat; + + if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) + zone++; + else { + pgdat = next_online_pgdat(pgdat); + if (pgdat) + zone = pgdat->node_zones; + else + zone = NULL; + } + return zone; +} + +/** + * for_each_zone - helper macro to iterate over all memory zones + * @zone - pointer to struct zone variable + * + * The user only needs to declare the zone variable, for_each_zone + * fills it in. + */ +#define for_each_zone(zone) \ + for (zone = (first_online_pgdat())->node_zones; \ + zone; \ + zone = next_zone(zone)) + #ifdef CONFIG_SPARSEMEM #include #endif diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index b959a4525cbd..1a9ef3e627d1 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -350,11 +350,15 @@ extern nodemask_t node_possible_map; #define num_possible_nodes() nodes_weight(node_possible_map) #define node_online(node) node_isset((node), node_online_map) #define node_possible(node) node_isset((node), node_possible_map) +#define first_online_node first_node(node_online_map) +#define next_online_node(nid) next_node((nid), node_online_map) #else #define num_online_nodes() 1 #define num_possible_nodes() 1 #define node_online(node) ((node) == 0) #define node_possible(node) ((node) == 0) +#define first_online_node 0 +#define next_online_node(nid) (MAX_NUMNODES) #endif #define any_online_node(mask) \ -- cgit v1.2.3 From 679bc9fbb508a0aac9539b2de747eb5849feb428 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:58 -0800 Subject: [PATCH] for_each_online_pgdat: for_each_bootmem Add a list_head to bootmem_data_t and make bootmems use it. bootmem list is sorted by node_boot_start. Only nodes against which init_bootmem() is called are linked to the list. (i386 allocates bootmem only from one node(0) not from all online nodes.) A summary: 1. for_each_online_pgdat() traverses all *online* nodes. 2. alloc_bootmem() allocates memory only from initialized-for-bootmem nodes. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 1 + mm/bootmem.c | 39 +++++++++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 7155452fb4a8..de3eb8d8ae26 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -38,6 +38,7 @@ typedef struct bootmem_data { unsigned long last_pos; unsigned long last_success; /* Previous allocation point. To speed * up searching */ + struct list_head list; } bootmem_data_t; extern unsigned long __init bootmem_bootmap_pages (unsigned long); diff --git a/mm/bootmem.c b/mm/bootmem.c index b55bd39fc5dd..d3e3bd2ffcea 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -33,6 +33,7 @@ EXPORT_SYMBOL(max_pfn); /* This is exported so * dma_get_required_mask(), which uses * it, can be an inline function */ +static LIST_HEAD(bdata_list); #ifdef CONFIG_CRASH_DUMP /* * If we have booted due to a crash, max_pfn will be a very low value. We need @@ -52,6 +53,27 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages) return mapsize; } +/* + * link bdata in order + */ +static void link_bootmem(bootmem_data_t *bdata) +{ + bootmem_data_t *ent; + if (list_empty(&bdata_list)) { + list_add(&bdata->list, &bdata_list); + return; + } + /* insert in order */ + list_for_each_entry(ent, &bdata_list, list) { + if (bdata->node_boot_start < ent->node_boot_start) { + list_add_tail(&bdata->list, &ent->list); + return; + } + } + list_add_tail(&bdata->list, &bdata_list); + return; +} + /* * Called once to set up the allocator itself. @@ -62,13 +84,11 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat, bootmem_data_t *bdata = pgdat->bdata; unsigned long mapsize = ((end - start)+7)/8; - pgdat->pgdat_next = pgdat_list; - pgdat_list = pgdat; - mapsize = ALIGN(mapsize, sizeof(long)); bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); bdata->node_boot_start = (start << PAGE_SHIFT); bdata->node_low_pfn = end; + link_bootmem(bdata); /* * Initially all pages are reserved - setup_arch() has to @@ -383,12 +403,11 @@ unsigned long __init free_all_bootmem (void) void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) { - pg_data_t *pgdat = pgdat_list; + bootmem_data_t *bdata; void *ptr; - for_each_pgdat(pgdat) - if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, - align, goal, 0))) + list_for_each_entry(bdata, &bdata_list, list) + if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0))) return(ptr); /* @@ -416,11 +435,11 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigne void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal) { - pg_data_t *pgdat = pgdat_list; + bootmem_data_t *bdata; void *ptr; - for_each_pgdat(pgdat) - if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, + list_for_each_entry(bdata, &bdata_list, list) + if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, LOW32LIMIT))) return(ptr); -- cgit v1.2.3 From ae0f15fb91274e67d78836d38c99ec363df33073 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:16:01 -0800 Subject: [PATCH] for_each_online_pgdat: remove pgdat_list By using for_each_online_pgdat(), pgdat_list is not necessary now. This patch removes it. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 3 --- mm/page_alloc.c | 8 ++++---- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 96eb08025092..0d12c3cf1f86 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -307,7 +307,6 @@ typedef struct pglist_data { unsigned long node_spanned_pages; /* total size of physical page range, including holes */ int node_id; - struct pglist_data *pgdat_next; wait_queue_head_t kswapd_wait; struct task_struct *kswapd; int kswapd_max_order; @@ -324,8 +323,6 @@ typedef struct pglist_data { #include -extern struct pglist_data *pgdat_list; - void __get_zone_counts(unsigned long *active, unsigned long *inactive, unsigned long *free, struct pglist_data *pgdat); void get_zone_counts(unsigned long *active, unsigned long *inactive, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ccc3713dd407..dc523a1f270d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -49,7 +49,6 @@ nodemask_t node_online_map __read_mostly = { { [0] = 1UL } }; EXPORT_SYMBOL(node_online_map); nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; EXPORT_SYMBOL(node_possible_map); -struct pglist_data *pgdat_list __read_mostly; unsigned long totalram_pages __read_mostly; unsigned long totalhigh_pages __read_mostly; long nr_swap_pages; @@ -2169,8 +2168,9 @@ static void *frag_start(struct seq_file *m, loff_t *pos) { pg_data_t *pgdat; loff_t node = *pos; - - for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next) + for (pgdat = first_online_pgdat(); + pgdat && node; + pgdat = next_online_pgdat(pgdat)) --node; return pgdat; @@ -2181,7 +2181,7 @@ static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) pg_data_t *pgdat = (pg_data_t *)arg; (*pos)++; - return pgdat->pgdat_next; + return next_online_pgdat(pgdat); } static void frag_stop(struct seq_file *m, void *arg) -- cgit v1.2.3 From 95144c788dc01b6a0ff2c9c2222e37ffdab358b8 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:16:02 -0800 Subject: [PATCH] uninline zone helpers Helper functions for for_each_online_pgdat/for_each_zone look too big to be inlined. Speed of these helper macro itself is not very important. (inner loops are tend to do more work than this) This patch make helper function to be out-of-lined. inline out-of-line .text 005c0680 005bf6a0 005c0680 - 005bf6a0 = FE0 = 4Kbytes. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 38 +++----------------------------------- mm/Makefile | 2 +- mm/mmzone.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 36 deletions(-) create mode 100644 mm/mmzone.c (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0d12c3cf1f86..b5c21122c299 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -418,20 +418,9 @@ extern struct pglist_data contig_page_data; #endif /* !CONFIG_NEED_MULTIPLE_NODES */ -static inline struct pglist_data *first_online_pgdat(void) -{ - return NODE_DATA(first_online_node); -} - -static inline struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) -{ - int nid = next_online_node(pgdat->node_id); - - if (nid == MAX_NUMNODES) - return NULL; - return NODE_DATA(nid); -} - +extern struct pglist_data *first_online_pgdat(void); +extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); +extern struct zone *next_zone(struct zone *zone); /** * for_each_pgdat - helper macro to iterate over all nodes @@ -441,27 +430,6 @@ static inline struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) for (pgdat = first_online_pgdat(); \ pgdat; \ pgdat = next_online_pgdat(pgdat)) - -/* - * next_zone - helper magic for for_each_zone() - * Thanks to William Lee Irwin III for this piece of ingenuity. - */ -static inline struct zone *next_zone(struct zone *zone) -{ - pg_data_t *pgdat = zone->zone_pgdat; - - if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) - zone++; - else { - pgdat = next_online_pgdat(pgdat); - if (pgdat) - zone = pgdat->node_zones; - else - zone = NULL; - } - return zone; -} - /** * for_each_zone - helper macro to iterate over all memory zones * @zone - pointer to struct zone variable diff --git a/mm/Makefile b/mm/Makefile index f10c753dce6d..0b8f73f2ed16 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -10,7 +10,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ page_alloc.o page-writeback.o pdflush.o \ readahead.o swap.o truncate.o vmscan.o \ - prio_tree.o util.o $(mmu-y) + prio_tree.o util.o mmzone.o $(mmu-y) obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o diff --git a/mm/mmzone.c b/mm/mmzone.c new file mode 100644 index 000000000000..b022370e612e --- /dev/null +++ b/mm/mmzone.c @@ -0,0 +1,50 @@ +/* + * linux/mm/mmzone.c + * + * management codes for pgdats and zones. + */ + + +#include +#include +#include +#include + +struct pglist_data *first_online_pgdat(void) +{ + return NODE_DATA(first_online_node); +} + +EXPORT_SYMBOL(first_online_pgdat); + +struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) +{ + int nid = next_online_node(pgdat->node_id); + + if (nid == MAX_NUMNODES) + return NULL; + return NODE_DATA(nid); +} +EXPORT_SYMBOL(next_online_pgdat); + + +/* + * next_zone - helper magic for for_each_zone() + */ +struct zone *next_zone(struct zone *zone) +{ + pg_data_t *pgdat = zone->zone_pgdat; + + if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) + zone++; + else { + pgdat = next_online_pgdat(pgdat); + if (pgdat) + zone = pgdat->node_zones; + else + zone = NULL; + } + return zone; +} +EXPORT_SYMBOL(next_zone); + -- cgit v1.2.3 From 22a9835c350782a5c3257343713932af3ac92ee0 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 27 Mar 2006 01:16:04 -0800 Subject: [PATCH] unify PFN_* macros Just about every architecture defines some macros to do operations on pfns. They're all virtually identical. This patch consolidates all of them. One minor glitch is that at least i386 uses them in a very skeletal header file. To keep away from #include dependency hell, I stuck the new definitions in a new, isolated header. Of all of the implementations, sh64 is the only one that varied by a bit. It used some masks to ensure that any sign-extension got ripped away before the arithmetic is done. This has been posted to that sh64 maintainers and the development list. Compiles on x86, x86_64, ia64 and ppc64. Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/setup.c | 9 +-------- arch/alpha/mm/numa.c | 4 +--- arch/arm26/mm/init.c | 7 +------ arch/cris/kernel/setup.c | 5 +---- arch/i386/kernel/setup.c | 1 + arch/i386/mm/discontig.c | 1 + arch/m32r/kernel/setup.c | 1 + arch/m32r/mm/discontig.c | 1 + arch/m32r/mm/init.c | 1 + arch/mips/ite-boards/ivr/init.c | 3 --- arch/mips/ite-boards/qed-4n-s01b/init.c | 3 --- arch/mips/kernel/setup.c | 9 +-------- arch/mips/mips-boards/generic/memory.c | 7 ++----- arch/mips/mips-boards/sim/sim_mem.c | 7 ++----- arch/mips/mm/init.c | 4 +--- arch/mips/sgi-ip27/ip27-memory.c | 3 +-- arch/sh/kernel/setup.c | 5 +---- arch/sh64/kernel/setup.c | 1 + arch/um/kernel/physmem.c | 3 +-- include/asm-i386/setup.h | 4 +--- include/asm-m32r/setup.h | 4 ---- include/asm-sh64/platform.h | 5 ----- include/linux/pfn.h | 9 +++++++++ 23 files changed, 29 insertions(+), 68 deletions(-) create mode 100644 include/linux/pfn.h (limited to 'include/linux') diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index b4e5f8ff2b25..9402624453c2 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -34,6 +34,7 @@ #include #include #include +#include #ifdef CONFIG_MAGIC_SYSRQ #include #include @@ -241,9 +242,6 @@ reserve_std_resources(void) request_resource(io, standard_io_resources+i); } -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) #define PFN_MAX PFN_DOWN(0x80000000) #define for_each_mem_cluster(memdesc, cluster, i) \ for ((cluster) = (memdesc)->cluster, (i) = 0; \ @@ -472,11 +470,6 @@ page_is_ram(unsigned long pfn) return 0; } -#undef PFN_UP -#undef PFN_DOWN -#undef PFN_PHYS -#undef PFN_MAX - void __init setup_arch(char **cmdline_p) { diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 6d5251254f68..bf6b65c81bef 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -27,9 +28,6 @@ bootmem_data_t node_bdata[MAX_NUMNODES]; #define DBGDCONT(args...) #endif -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) #define for_each_mem_cluster(memdesc, cluster, i) \ for ((cluster) = (memdesc)->cluster, (i) = 0; \ (i) < (memdesc)->numclusters; (i)++, (cluster)++) diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c index e3ecaa453747..7da8a5205678 100644 --- a/arch/arm26/mm/init.c +++ b/arch/arm26/mm/init.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -101,12 +102,6 @@ struct node_info { int bootmap_pages; }; -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_UP(x) (PAGE_ALIGN(x) >> PAGE_SHIFT) -#define PFN_SIZE(x) ((x) >> PAGE_SHIFT) -#define PFN_RANGE(s,e) PFN_SIZE(PAGE_ALIGN((unsigned long)(e)) - \ - (((unsigned long)(s)) & PAGE_MASK)) - /* * FIXME: We really want to avoid allocating the bootmap bitmap * over the top of the initrd. Hopefully, this is located towards diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c index 1ba57efff60d..619a6eefd893 100644 --- a/arch/cris/kernel/setup.c +++ b/arch/cris/kernel/setup.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -88,10 +89,6 @@ setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) &_edata; init_mm.brk = (unsigned long) &_end; -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) - /* min_low_pfn points to the start of DRAM, start_pfn points * to the first DRAM pages after the kernel, and max_low_pfn * to the end of DRAM. diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 6917daa159ab..8c08660b4e5d 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -46,6 +46,7 @@ #include #include #include +#include #include