diff options
Diffstat (limited to 'include')
50 files changed, 934 insertions, 648 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f94bc83011ed..78feda9bbae2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -19,6 +19,7 @@ #include <linux/gfp.h> #include <linux/bsg.h> #include <linux/smp.h> +#include <linux/rcupdate.h> #include <asm/scatterlist.h> @@ -437,6 +438,7 @@ struct request_queue { /* Throttle data */ struct throtl_data *td; #endif + struct rcu_head rcu_head; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ @@ -974,7 +976,6 @@ struct blk_plug { unsigned long magic; /* detect uninitialized use-cases */ struct list_head list; /* requests */ struct list_head cb_list; /* md requires an unplug callback */ - unsigned int should_sort; /* list to be sorted before flushing? */ }; #define BLK_MAX_REQUEST_COUNT 16 diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7c2e030e72f1..0ea61e07a91c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -12,6 +12,7 @@ struct blk_trace { int trace_state; + bool rq_based; struct rchan *rchan; unsigned long __percpu *sequence; unsigned char __percpu *msg_data; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 458f497738a4..5afc4f94d110 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -126,7 +126,6 @@ BUFFER_FNS(Write_EIO, write_io_error) BUFFER_FNS(Unwritten, unwritten) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) -#define touch_buffer(bh) mark_page_accessed(bh->b_page) /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ @@ -142,6 +141,7 @@ BUFFER_FNS(Unwritten, unwritten) void mark_buffer_dirty(struct buffer_head *bh); void init_buffer(struct buffer_head *, bh_end_io_t *, void *); +void touch_buffer(struct buffer_head *bh); void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); int try_to_free_buffers(struct page *); diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index dad579b0c0e6..76554cecaab2 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -12,16 +12,46 @@ #define CEPH_FEATURE_MONNAMES (1<<5) #define CEPH_FEATURE_RECONNECT_SEQ (1<<6) #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) -/* bits 8-17 defined by user-space; not supported yet here */ +#define CEPH_FEATURE_OBJECTLOCATOR (1<<8) +#define CEPH_FEATURE_PGID64 (1<<9) +#define CEPH_FEATURE_INCSUBOSDMAP (1<<10) +#define CEPH_FEATURE_PGPOOL3 (1<<11) +#define CEPH_FEATURE_OSDREPLYMUX (1<<12) +#define CEPH_FEATURE_OSDENC (1<<13) +#define CEPH_FEATURE_OMAP (1<<14) +#define CEPH_FEATURE_MONENC (1<<15) +#define CEPH_FEATURE_QUERY_T (1<<16) +#define CEPH_FEATURE_INDEP_PG_MAP (1<<17) #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) +#define CEPH_FEATURE_CHUNKY_SCRUB (1<<19) +#define CEPH_FEATURE_MON_NULLROUTE (1<<20) +#define CEPH_FEATURE_MON_GV (1<<21) +#define CEPH_FEATURE_BACKFILL_RESERVATION (1<<22) +#define CEPH_FEATURE_MSG_AUTH (1<<23) +#define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24) +#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25) +#define CEPH_FEATURE_CREATEPOOLID (1<<26) +#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) +#define CEPH_FEATURE_OSD_HBMSGS (1<<28) +#define CEPH_FEATURE_MDSENC (1<<29) +#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30) /* * Features supported. */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ - (CEPH_FEATURE_NOSRCADDR | \ - CEPH_FEATURE_CRUSH_TUNABLES) + (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_PGID64 | \ + CEPH_FEATURE_PGPOOL3 | \ + CEPH_FEATURE_OSDENC | \ + CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_CRUSH_TUNABLES2 | \ + CEPH_FEATURE_REPLY_CREATE_INODE | \ + CEPH_FEATURE_OSDHASHPSPOOL) #define CEPH_FEATURES_REQUIRED_DEFAULT \ - (CEPH_FEATURE_NOSRCADDR) + (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_PGID64 | \ + CEPH_FEATURE_PGPOOL3 | \ + CEPH_FEATURE_OSDENC) #endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index cf6f4d998a76..2ad7b860f062 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -21,16 +21,14 @@ * internal cluster protocols separately from the public, * client-facing protocol. */ -#define CEPH_OSD_PROTOCOL 8 /* cluster internal */ -#define CEPH_MDS_PROTOCOL 12 /* cluster internal */ -#define CEPH_MON_PROTOCOL 5 /* cluster internal */ #define CEPH_OSDC_PROTOCOL 24 /* server/client */ #define CEPH_MDSC_PROTOCOL 32 /* server/client */ #define CEPH_MONC_PROTOCOL 15 /* server/client */ -#define CEPH_INO_ROOT 1 -#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_ROOT 1 +#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 @@ -51,7 +49,7 @@ struct ceph_file_layout { __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ /* object -> pg layout */ - __le32 fl_unused; /* unused; used to be preferred primary (-1) */ + __le32 fl_unused; /* unused; used to be preferred primary for pg (-1 for none) */ __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ } __attribute__ ((packed)); @@ -101,6 +99,8 @@ struct ceph_dir_layout { #define CEPH_MSG_MON_SUBSCRIBE_ACK 16 #define CEPH_MSG_AUTH 17 #define CEPH_MSG_AUTH_REPLY 18 +#define CEPH_MSG_MON_GET_VERSION 19 +#define CEPH_MSG_MON_GET_VERSION_REPLY 20 /* client <-> mds */ #define CEPH_MSG_MDS_MAP 21 @@ -221,6 +221,11 @@ struct ceph_mon_subscribe_ack { } __attribute__ ((packed)); /* + * mdsmap flags + */ +#define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */ + +/* * mds states * > 0 -> in * <= 0 -> out @@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack { #define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ #define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ +#define CEPH_MDS_STATE_REPLAYONCE -9 /* up, replaying an active node's journal */ #define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ #define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed @@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s); #define CEPH_LOCK_IXATTR 2048 #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ +#define CEPH_LOCK_IPOLICY 16384 /* policy lock on dirs. MDS internal */ /* client_session ops */ enum { @@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_SETATTR_SIZE 32 #define CEPH_SETATTR_CTIME 64 +/* + * Ceph setxattr request flags. + */ +#define CEPH_XATTR_CREATE 1 +#define CEPH_XATTR_REPLACE 2 + union ceph_mds_request_args { struct { __le32 mask; /* CEPH_CAP_* */ @@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags); #define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ #define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ +#define CEPH_CAP_SIMPLE_BITS 2 +#define CEPH_CAP_FILE_BITS 8 + /* per-lock shift */ #define CEPH_CAP_SAUTH 2 #define CEPH_CAP_SLINK 4 #define CEPH_CAP_SXATTR 6 #define CEPH_CAP_SFILE 8 -#define CEPH_CAP_SFLOCK 20 +#define CEPH_CAP_SFLOCK 20 -#define CEPH_CAP_BITS 22 +#define CEPH_CAP_BITS 22 /* composed values */ #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 63d092822bad..360d9d08ca9e 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -52,10 +52,10 @@ static inline int ceph_has_room(void **p, void *end, size_t n) return end >= *p && n <= end - *p; } -#define ceph_decode_need(p, end, n, bad) \ - do { \ - if (!likely(ceph_has_room(p, end, n))) \ - goto bad; \ +#define ceph_decode_need(p, end, n, bad) \ + do { \ + if (!likely(ceph_has_room(p, end, n))) \ + goto bad; \ } while (0) #define ceph_decode_64_safe(p, end, v, bad) \ @@ -99,8 +99,8 @@ static inline int ceph_has_room(void **p, void *end, size_t n) * * There are two possible failures: * - converting the string would require accessing memory at or - * beyond the "end" pointer provided (-E - * - memory could not be allocated for the result + * beyond the "end" pointer provided (-ERANGE) + * - memory could not be allocated for the result (-ENOMEM) */ static inline char *ceph_extract_encoded_string(void **p, void *end, size_t *lenp, gfp_t gfp) @@ -217,10 +217,10 @@ static inline void ceph_encode_string(void **p, void *end, *p += len; } -#define ceph_encode_need(p, end, n, bad) \ - do { \ - if (!likely(ceph_has_room(p, end, n))) \ - goto bad; \ +#define ceph_encode_need(p, end, n, bad) \ + do { \ + if (!likely(ceph_has_room(p, end, n))) \ + goto bad; \ } while (0) #define ceph_encode_64_safe(p, end, v, bad) \ @@ -231,12 +231,17 @@ static inline void ceph_encode_string(void **p, void *end, #define ceph_encode_32_safe(p, end, v, bad) \ do { \ ceph_encode_need(p, end, sizeof(u32), bad); \ - ceph_encode_32(p, v); \ + ceph_encode_32(p, v); \ } while (0) #define ceph_encode_16_safe(p, end, v, bad) \ do { \ ceph_encode_need(p, end, sizeof(u16), bad); \ - ceph_encode_16(p, v); \ + ceph_encode_16(p, v); \ + } while (0) +#define ceph_encode_8_safe(p, end, v, bad) \ + do { \ + ceph_encode_need(p, end, sizeof(u8), bad); \ + ceph_encode_8(p, v); \ } while (0) #define ceph_encode_copy_safe(p, end, pv, n, bad) \ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 084d3c622b12..29818fc3fa49 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len) } /* ceph_common.c */ +extern bool libceph_compatible(void *data); + extern const char *ceph_msg_type_name(int type); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); extern struct kmem_cache *ceph_inode_cachep; @@ -220,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client); /* pagevec.c */ extern void ceph_release_page_vector(struct page **pages, int num_pages); -extern struct page **ceph_get_direct_page_vector(const char __user *data, +extern struct page **ceph_get_direct_page_vector(const void __user *data, int num_pages, bool write_page); extern void ceph_put_page_vector(struct page **pages, int num_pages, @@ -228,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages, extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, + const void __user *data, loff_t off, size_t len); -extern int ceph_copy_to_page_vector(struct page **pages, - const char *data, +extern void ceph_copy_to_page_vector(struct page **pages, + const void *data, loff_t off, size_t len); -extern int ceph_copy_from_page_vector(struct page **pages, - char *data, +extern void ceph_copy_from_page_vector(struct page **pages, + void *data, loff_t off, size_t len); -extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, +extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data, loff_t off, size_t len); extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index cb15b5d867c7..87ed09f54800 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -29,8 +29,8 @@ struct ceph_mdsmap { /* which object pools file data can be stored in */ int m_num_data_pg_pools; - u32 *m_data_pg_pools; - u32 m_cas_pg_pool; + u64 *m_data_pg_pools; + u64 m_cas_pg_pool; }; static inline struct ceph_entity_addr * diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 14ba5ee738a9..60903e0f665c 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -83,9 +83,11 @@ struct ceph_msg { struct list_head list_head; struct kref kref; +#ifdef CONFIG_BLOCK struct bio *bio; /* instead of pages/pagelist */ struct bio *bio_iter; /* bio iterator */ int bio_seg; /* current bio segment */ +#endif /* CONFIG_BLOCK */ struct ceph_pagelist *trail; /* the trailing part of the data */ bool front_is_vmalloc; bool more_to_follow; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d9b880e977e6..1dd5d466b6f9 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -10,6 +10,7 @@ #include <linux/ceph/osdmap.h> #include <linux/ceph/messenger.h> #include <linux/ceph/auth.h> +#include <linux/ceph/pagelist.h> /* * Maximum object name size @@ -22,7 +23,6 @@ struct ceph_snap_context; struct ceph_osd_request; struct ceph_osd_client; struct ceph_authorizer; -struct ceph_pagelist; /* * completion callback for async writepages @@ -47,6 +47,9 @@ struct ceph_osd { struct list_head o_keepalive_item; }; + +#define CEPH_OSD_MAX_OP 10 + /* an in-flight request */ struct ceph_osd_request { u64 r_tid; /* unique for this client */ @@ -63,9 +66,23 @@ struct ceph_osd_request { struct ceph_connection *r_con_filling_msg; struct ceph_msg *r_request, *r_reply; - int r_result; int r_flags; /* any additional flags for the osd */ u32 r_sent; /* >0 if r_request is sending/sent */ + int r_num_ops; + + /* encoded message content */ + struct ceph_osd_op *r_request_ops; + /* these are updated on each send */ + __le32 *r_request_osdmap_epoch; + __le32 *r_request_flags; + __le64 *r_request_pool; + void *r_request_pgid; + __le32 *r_request_attempts; + struct ceph_eversion *r_request_reassert_version; + + int r_result; + int r_reply_op_len[CEPH_OSD_MAX_OP]; + s32 r_reply_op_result[CEPH_OSD_MAX_OP]; int r_got_reply; int r_linger; @@ -82,6 +99,7 @@ struct ceph_osd_request { char r_oid[MAX_OBJ_NAME_SIZE]; /* object name */ int r_oid_len; + u64 r_snapid; unsigned long r_stamp; /* send OR check time */ struct ceph_file_layout r_file_layout; @@ -95,7 +113,7 @@ struct ceph_osd_request { struct bio *r_bio; /* instead of pages */ #endif - struct ceph_pagelist *r_trail; /* trailing part of the data */ + struct ceph_pagelist r_trail; /* trailing part of the data */ }; struct ceph_osd_event { @@ -107,7 +125,6 @@ struct ceph_osd_event { struct rb_node node; struct list_head osd_node; struct kref kref; - struct completion completion; }; struct ceph_osd_event_work { @@ -157,7 +174,7 @@ struct ceph_osd_client { struct ceph_osd_req_op { u16 op; /* CEPH_OSD_OP_* */ - u32 flags; /* CEPH_OSD_FLAG_* */ + u32 payload_len; union { struct { u64 offset, length; @@ -166,23 +183,24 @@ struct ceph_osd_req_op { } extent; struct { const char *name; - u32 name_len; const char *val; + u32 name_len; u32 value_len; __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ } xattr; struct { const char *class_name; - __u8 class_len; const char *method_name; - __u8 method_len; - __u8 argc; const char *indata; u32 indata_len; + __u8 class_len; + __u8 method_len; + __u8 argc; } cls; struct { - u64 cookie, count; + u64 cookie; + u64 count; } pgls; struct { u64 snapid; @@ -190,12 +208,11 @@ struct ceph_osd_req_op { struct { u64 cookie; u64 ver; - __u8 flag; u32 prot_ver; u32 timeout; + __u8 flag; } watch; }; - u32 payload_len; }; extern int ceph_osdc_init(struct ceph_osd_client *osdc, @@ -207,29 +224,19 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); -extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc, - struct ceph_file_layout *layout, - u64 snapid, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op); - extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, - int flags, struct ceph_snap_context *snapc, - struct ceph_osd_req_op *ops, + unsigned int num_op, bool use_mempool, - gfp_t gfp_flags, - struct page **pages, - struct bio *bio); + gfp_t gfp_flags); extern void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 *plen, + u64 off, u64 len, + unsigned int num_op, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, - struct timespec *mtime, - const char *oid, - int oid_len); + u64 snap_id, + struct timespec *mtime); extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, struct ceph_file_layout *layout, @@ -239,8 +246,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, int do_sync, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply, - int page_align); + bool use_mempool, int page_align); extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, struct ceph_osd_request *req); @@ -279,17 +285,13 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - struct page **pages, int nr_pages, - int flags, int do_sync, bool nofail); + struct page **pages, int nr_pages); /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), - int one_shot, void *data, - struct ceph_osd_event **pevent); + void *data, struct ceph_osd_event **pevent); extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); -extern int ceph_osdc_wait_event(struct ceph_osd_event *event, - unsigned long timeout); extern void ceph_osdc_put_event(struct ceph_osd_event *event); #endif diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 10a417f9f76f..c819190d1642 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -18,14 +18,31 @@ * The map can be updated either via an incremental map (diff) describing * the change between two successive epochs, or as a fully encoded map. */ +struct ceph_pg { + uint64_t pool; + uint32_t seed; +}; + +#define CEPH_POOL_FLAG_HASHPSPOOL 1 + struct ceph_pg_pool_info { struct rb_node node; - int id; - struct ceph_pg_pool v; - int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; + s64 id; + u8 type; + u8 size; + u8 crush_ruleset; + u8 object_hash; + u32 pg_num, pgp_num; + int pg_num_mask, pgp_num_mask; + u64 flags; char *name; }; +struct ceph_object_locator { + uint64_t pool; + char *key; +}; + struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; @@ -110,15 +127,16 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map); /* calculate mapping of a file extent to an object */ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 *plen, + u64 off, u64 len, u64 *bno, u64 *oxoff, u64 *oxlen); /* calculate mapping of object to a placement group */ -extern int ceph_calc_object_layout(struct ceph_object_layout *ol, +extern int ceph_calc_object_layout(struct ceph_pg *pg, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap); -extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, +extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, + struct ceph_pg pgid, int *acting); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2c04afeead1c..68c96a508ac2 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -9,14 +9,6 @@ #include <linux/ceph/msgr.h> /* - * osdmap encoding versions - */ -#define CEPH_OSDMAP_INC_VERSION 5 -#define CEPH_OSDMAP_INC_VERSION_EXT 6 -#define CEPH_OSDMAP_VERSION 5 -#define CEPH_OSDMAP_VERSION_EXT 6 - -/* * fs id */ struct ceph_fsid { @@ -64,7 +56,7 @@ struct ceph_timespec { * placement group. * we encode this into one __le64. */ -struct ceph_pg { +struct ceph_pg_v1 { __le16 preferred; /* preferred primary osd */ __le16 ps; /* placement seed */ __le32 pool; /* object pool */ @@ -91,21 +83,6 @@ struct ceph_pg { #define CEPH_PG_TYPE_REP 1 #define CEPH_PG_TYPE_RAID4 2 -#define CEPH_PG_POOL_VERSION 2 -struct ceph_pg_pool { - __u8 type; /* CEPH_PG_TYPE_* */ - __u8 size; /* number of osds in each pg */ - __u8 crush_ruleset; /* crush placement rule */ - __u8 object_hash; /* hash mapping object name to ps */ - __le32 pg_num, pgp_num; /* number of pg's */ - __le32 lpg_num, lpgp_num; /* number of localized pg's */ - __le32 last_change; /* most recent epoch changed */ - __le64 snap_seq; /* seq for per-pool snapshot */ - __le32 snap_epoch; /* epoch of last snap */ - __le32 num_snaps; - __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ - __le64 auid; /* who owns the pg */ -} __attribute__ ((packed)); /* * stable_mod func is used to control number of placement groups. @@ -128,7 +105,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask) * object layout - how a given object should be stored. */ struct ceph_object_layout { - struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */ + struct ceph_pg_v1 ol_pgid; /* raw pg, with _full_ ps precision. */ __le32 ol_stripe_unit; /* for per-object parity, if any */ } __attribute__ ((packed)); @@ -145,8 +122,12 @@ struct ceph_eversion { */ /* status bits */ -#define CEPH_OSD_EXISTS 1 -#define CEPH_OSD_UP 2 +#define CEPH_OSD_EXISTS (1<<0) +#define CEPH_OSD_UP (1<<1) +#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ +#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ + +extern const char *ceph_osd_state_name(int s); /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ #define CEPH_OSD_IN 0x10000 @@ -161,9 +142,25 @@ struct ceph_eversion { #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ +#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */ +#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */ +#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */ +#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ +#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ +#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ + +/* + * The error code to return when an OSD can't handle a write + * because it is too large. + */ +#define OSD_WRITETOOBIG EMSGSIZE /* * osd ops + * + * WARNING: do not use these op codes directly. Use the helpers + * defined below instead. In certain cases, op code behavior was + * redefined, resulting in special-cases in the helpers. */ #define CEPH_OSD_OP_MODE 0xf000 #define CEPH_OSD_OP_MODE_RD 0x1000 @@ -177,6 +174,7 @@ struct ceph_eversion { #define CEPH_OSD_OP_TYPE_ATTR 0x0300 #define CEPH_OSD_OP_TYPE_EXEC 0x0400 #define CEPH_OSD_OP_TYPE_PG 0x0500 +#define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */ enum { /** data **/ @@ -217,6 +215,23 @@ enum { CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, + /* omap */ + CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17, + CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18, + CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19, + CEPH_OSD_OP_OMAPGETVALSBYKEYS = + CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20, + CEPH_OSD_OP_OMAPSETVALS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21, + CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22, + CEPH_OSD_OP_OMAPCLEAR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23, + CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, + CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, + + /** multi **/ + CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, + CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, + CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3, + /** attrs **/ /* read */ CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, @@ -238,6 +253,7 @@ enum { CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, + CEPH_OSD_OP_SCRUB_MAP = CEPH_OSD_OP_MODE_SUB | 9, /** lock **/ CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, @@ -248,10 +264,12 @@ enum { CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, /** exec **/ + /* note: the RD bit here is wrong; see special-case below in helper */ CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, /** pg **/ CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, + CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2, }; static inline int ceph_osd_op_type_lock(int op) @@ -274,6 +292,10 @@ static inline int ceph_osd_op_type_pg(int op) { return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; } +static inline int ceph_osd_op_type_multi(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI; +} static inline int ceph_osd_op_mode_subop(int op) { @@ -281,11 +303,12 @@ static inline int ceph_osd_op_mode_subop(int op) } static inline int ceph_osd_op_mode_read(int op) { - return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; + return (op & CEPH_OSD_OP_MODE_RD) && + op != CEPH_OSD_OP_CALL; } static inline int ceph_osd_op_mode_modify(int op) { - return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; + return op & CEPH_OSD_OP_MODE_WR; } /* @@ -294,34 +317,38 @@ static inline int ceph_osd_op_mode_modify(int op) */ #define CEPH_OSD_TMAP_HDR 'h' #define CEPH_OSD_TMAP_SET 's' +#define CEPH_OSD_TMAP_CREATE 'c' /* create key */ #define CEPH_OSD_TMAP_RM 'r' +#define CEPH_OSD_TMAP_RMSLOPPY 'R' extern const char *ceph_osd_op_name(int op); - /* * osd op flags * * An op may be READ, WRITE, or READ|WRITE. */ enum { - CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ - CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ - CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ - CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ - CEPH_OSD_FLAG_READ = 16, /* op may read */ - CEPH_OSD_FLAG_WRITE = 32, /* op may write */ - CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ - CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ - CEPH_OSD_FLAG_BALANCE_READS = 256, - CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ - CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ - CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ - CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ + CEPH_OSD_FLAG_ACK = 0x0001, /* want (or is) "ack" ack */ + CEPH_OSD_FLAG_ONNVRAM = 0x0002, /* want (or is) "onnvram" ack */ + CEPH_OSD_FLAG_ONDISK = 0x0004, /* want (or is) "ondisk" ack */ + CEPH_OSD_FLAG_RETRY = 0x0008, /* resend attempt */ + CEPH_OSD_FLAG_READ = 0x0010, /* op may read */ + CEPH_OSD_FLAG_WRITE = 0x0020, /* op may write */ + CEPH_OSD_FLAG_ORDERSNAP = 0x0040, /* EOLDSNAP if snapc is out of order */ + CEPH_OSD_FLAG_PEERSTAT_OLD = 0x0080, /* DEPRECATED msg includes osd_peer_stat */ + CEPH_OSD_FLAG_BALANCE_READS = 0x0100, + CEPH_OSD_FLAG_PARALLELEXEC = 0x0200, /* execute op in parallel */ + CEPH_OSD_FLAG_PGOP = 0x0400, /* pg op, no object */ + CEPH_OSD_FLAG_EXEC = 0x0800, /* op may exec */ + CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */ + CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */ + CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */ }; enum { CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ + CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */ }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ @@ -381,48 +408,13 @@ struct ceph_osd_op { __le64 ver; __u8 flag; /* 0 = unwatch, 1 = watch */ } __attribute__ ((packed)) watch; -}; + struct { + __le64 offset, length; + __le64 src_offset; + } __attribute__ ((packed)) clonerange; + }; __le32 payload_len; } __attribute__ ((packed)); -/* - * osd request message header. each request may include multiple - * ceph_osd_op object operations. - */ -struct ceph_osd_request_head { - __le32 client_inc; /* client incarnation */ - struct ceph_object_layout layout; /* pgid */ - __le32 osdmap_epoch; /* client's osdmap epoch */ - - __le32 flags; - - struct ceph_timespec mtime; /* for mutations only */ - struct ceph_eversion reassert_version; /* if we are replaying op */ - - __le32 object_len; /* length of object name */ - - __le64 snapid; /* snapid to read */ - __le64 snap_seq; /* writer's snap context */ - __le32 num_snaps; - - __le16 num_ops; - struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */ -} __attribute__ ((packed)); - -struct ceph_osd_reply_head { - __le32 client_inc; /* client incarnation */ - __le32 flags; - struct ceph_object_layout layout; - __le32 osdmap_epoch; - struct ceph_eversion reassert_version; /* for replaying uncommitted */ - - __le32 result; /* result code */ - - __le32 object_len; /* length of object name */ - __le32 num_ops; - struct ceph_osd_op ops[0]; /* ops[], object */ -} __attribute__ ((packed)); - - #endif diff --git a/include/linux/completion.h b/include/linux/completion.h index 51494e6b5548..33f0280fd533 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -77,10 +77,13 @@ static inline void init_completion(struct completion *x) } extern void wait_for_completion(struct completion *); +extern void wait_for_completion_io(struct completion *); extern int wait_for_completion_interruptible(struct completion *x); extern int wait_for_completion_killable(struct completion *x); extern unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout); +extern unsigned long wait_for_completion_io_timeout(struct completion *x, + unsigned long timeout); extern long wait_for_completion_interruptible_timeout( struct completion *x, unsigned long timeout); extern long wait_for_completion_killable_timeout( diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 25baa287cff7..6a1101f24cfb 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -162,6 +162,8 @@ struct crush_map { __u32 choose_local_fallback_tries; /* choose attempts before giving up */ __u32 choose_total_tries; + /* attempt chooseleaf inner descent once; on failure retry outer descent */ + __u32 chooseleaf_descend_once; }; diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 3bd46f766751..a975de1ff59f 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -51,7 +51,7 @@ struct task_struct; extern void debug_show_all_locks(void); extern void debug_show_held_locks(struct task_struct *task); extern void debug_check_no_locks_freed(const void *from, unsigned long len); -extern void debug_check_no_locks_held(struct task_struct *task); +extern void debug_check_no_locks_held(void); #else static inline void debug_show_all_locks(void) { @@ -67,7 +67,7 @@ debug_check_no_locks_freed(const void *from, unsigned long len) } static inline void -debug_check_no_locks_held(struct task_struct *task) +debug_check_no_locks_held(void) { } #endif diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 186620631750..acd0312d46fb 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -2,6 +2,7 @@ #define _LINUX_ELEVATOR_H #include <linux/percpu.h> +#include <linux/hashtable.h> #ifdef CONFIG_BLOCK @@ -96,6 +97,8 @@ struct elevator_type struct list_head list; }; +#define ELV_HASH_BITS 6 + /* * each queue has an elevator_queue associated with it */ @@ -105,8 +108,8 @@ struct elevator_queue void *elevator_data; struct kobject kobj; struct mutex sysfs_lock; - struct hlist_head *hash; unsigned int registered:1; + DECLARE_HASHTABLE(hash, ELV_HASH_BITS); }; /* diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 3c3ef19a625a..cf5d2af61b81 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -13,7 +13,7 @@ #include <linux/wait.h> /* - * CAREFUL: Check include/asm-generic/fcntl.h when defining + * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining * new flags, since they might collide with O_* ones. We want * to re-use O_* flags that couldn't possibly have a meaning * from eventfd, in order to leave a free define-space for diff --git a/include/linux/freezer.h b/include/linux/freezer.h index e70df40d84f6..043a5cf8b5ba 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -3,6 +3,7 @@ #ifndef FREEZER_H_INCLUDED #define FREEZER_H_INCLUDED +#include <linux/debug_locks.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/atomic.h> @@ -48,6 +49,8 @@ extern void thaw_kernel_threads(void); static inline bool try_to_freeze(void) { + if (!(current->flags & PF_NOFREEZE)) + debug_check_no_locks_held(); might_sleep(); if (likely(!freezing(current))) return false; diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h index 227c62424f3c..a9df51f5d54c 100644 --- a/include/linux/hashtable.h +++ b/include/linux/hashtable.h @@ -115,51 +115,50 @@ static inline void hash_del_rcu(struct hlist_node *node) * hash_for_each - iterate over a hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @node: the &struct list_head to use as a loop cursor for each entry * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ -#define hash_for_each(name, bkt, node, obj, member) \ - for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ - hlist_for_each_entry(obj, node, &name[bkt], member) +#define hash_for_each(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry(obj, &name[bkt], member) /** * hash_for_each_rcu - iterate over a rcu enabled hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @node: the &struct list_head to use as a loop cursor for each entry * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ -#define hash_for_each_rcu(name, bkt, node, obj, member) \ - for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ - hlist_for_each_entry_rcu(obj, node, &name[bkt], member) +#define hash_for_each_rcu(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_rcu(obj, &name[bkt], member) /** * hash_for_each_safe - iterate over a hashtable safe against removal of * hash entry * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @node: the &struct list_head to use as a loop cursor for each entry * @tmp: a &struct used for temporary storage * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ -#define hash_for_each_safe(name, bkt, node, tmp, obj, member) \ - for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ - hlist_for_each_entry_safe(obj, node, tmp, &name[bkt], member) +#define hash_for_each_safe(name, bkt, tmp, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_safe(obj, tmp, &name[bkt], member) /** * hash_for_each_possible - iterate over all possible objects hashing to the * same bucket * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @node: the &struct list_head to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible(name, obj, node, member, key) \ - hlist_for_each_entry(obj, node, &name[hash_min(key, HASH_BITS(name))], member) +#define hash_for_each_possible(name, obj, member, key) \ + hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member) /** * hash_for_each_possible_rcu - iterate over all possible objects hashing to the @@ -167,25 +166,24 @@ static inline void hash_del_rcu(struct hlist_node *node) * in a rcu enabled hashtable * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @node: the &struct list_head to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible_rcu(name, obj, node, member, key) \ - hlist_for_each_entry_rcu(obj, node, &name[hash_min(key, HASH_BITS(name))], member) +#define hash_for_each_possible_rcu(name, obj, member, key) \ + hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ + member) /** * hash_for_each_possible_safe - iterate over all possible objects hashing to the * same bucket safe against removals * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @node: the &struct list_head to use as a loop cursor for each entry * @tmp: a &struct used for temporary storage * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible_safe(name, obj, node, tmp, member, key) \ - hlist_for_each_entry_safe(obj, node, tmp, \ +#define hash_for_each_possible_safe(name, obj, tmp, member, key) \ + hlist_for_each_entry_safe(obj, tmp,\ &name[hash_min(key, HASH_BITS(name))], member) diff --git a/include/linux/idr.h b/include/linux/idr.h index e5eb125effe6..a6f38b5c34e4 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -17,69 +17,40 @@ #include <linux/init.h> #include <linux/rcupdate.h> -#if BITS_PER_LONG == 32 -# define IDR_BITS 5 -# define IDR_FULL 0xfffffffful -/* We can only use two of the bits in the top level because there is - only one possible bit in the top level (5 bits * 7 levels = 35 - bits, but you only use 31 bits in the id). */ -# define TOP_LEVEL_FULL (IDR_FULL >> 30) -#elif BITS_PER_LONG == 64 -# define IDR_BITS 6 -# define IDR_FULL 0xfffffffffffffffful -/* We can only use two of the bits in the top level because there is - only one possible bit in the top level (6 bits * 6 levels = 36 - bits, but you only use 31 bits in the id). */ -# define TOP_LEVEL_FULL (IDR_FULL >> 62) -#else -# error "BITS_PER_LONG is not 32 or 64" -#endif - +/* + * We want shallower trees and thus more bits covered at each layer. 8 + * bits gives us large enough first layer for most use cases and maximum + * tree depth of 4. Each idr_layer is slightly larger than 2k on 64bit and + * 1k on 32bit. + */ +#define IDR_BITS 8 #define IDR_SIZE (1 << IDR_BITS) #define IDR_MASK ((1 << IDR_BITS)-1) -#define MAX_IDR_SHIFT (sizeof(int)*8 - 1) -#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) -#define MAX_IDR_MASK (MAX_IDR_BIT - 1) - -/* Leave the possibility of an incomplete final layer */ -#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS) - -/* Number of id_layer structs to leave in free list */ -#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2) - struct idr_layer { - unsigned long bitmap; /* A zero bit means "space here" */ + int prefix; /* the ID prefix of this idr_layer */ + DECLARE_BITMAP(bitmap, IDR_SIZE); /* A zero bit means "space here" */ struct idr_layer __rcu *ary[1<<IDR_BITS]; - int count; /* When zero, we can release it */ - int layer; /* distance from leaf */ - struct rcu_head rcu_head; + int count; /* When zero, we can release it */ + int layer; /* distance from leaf */ + struct rcu_head rcu_head; }; struct idr { - struct idr_layer __rcu *top; - struct idr_layer *id_free; - int layers; /* only valid without concurrent changes */ - int id_free_cnt; - spinlock_t lock; + struct idr_layer __rcu *hint; /* the last layer allocated from */ + struct idr_layer __rcu *top; + struct idr_layer *id_free; + int layers; /* only valid w/o concurrent changes */ + int id_free_cnt; + spinlock_t lock; }; -#define IDR_INIT(name) \ -{ \ - .top = NULL, \ - .id_free = NULL, \ - .layers = 0, \ - .id_free_cnt = 0, \ - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ +#define IDR_INIT(name) \ +{ \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ } #define DEFINE_IDR(name) struct idr name = IDR_INIT(name) -/* Actions to be taken after a call to _idr_sub_alloc */ -#define IDR_NEED_TO_GROW -2 -#define IDR_NOMORE_SPACE -3 - -#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC) - /** * DOC: idr sync * idr synchronization (stolen from radix-tree.h) @@ -101,19 +72,90 @@ struct idr { * This is what we export. */ -void *idr_find(struct idr *idp, int id); +void *idr_find_slowpath(struct idr *idp, int id); int idr_pre_get(struct idr *idp, gfp_t gfp_mask); -int idr_get_new(struct idr *idp, void *ptr, int *id); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); +void idr_preload(gfp_t gfp_mask); +int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask); int idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *idp, int *nextid); void *idr_replace(struct idr *idp, void *ptr, int id); void idr_remove(struct idr *idp, int id); -void idr_remove_all(struct idr *idp); +void idr_free(struct idr *idp, int id); void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); +/** + * idr_preload_end - end preload section started with idr_preload() + * + * Each idr_preload() should be matched with an invocation of this + * function. See idr_preload() for details. + */ +static inline void idr_preload_end(void) +{ + preempt_enable(); +} + +/** + * idr_find - return pointer for given id + * @idp: idr handle + * @id: lookup key + * + * Return the pointer given the id it has been registered with. A %NULL + * return indicates that @id is not valid or you passed %NULL in + * idr_get_new(). + * + * This function can be called under rcu_read_lock(), given that the leaf + * pointers lifetimes are correctly managed. + */ +static inline void *idr_find(struct idr *idr, int id) +{ + struct idr_layer *hint = rcu_dereference_raw(idr->hint); + + if (hint && (id & ~IDR_MASK) == hint->prefix) + return rcu_dereference_raw(hint->ary[id & IDR_MASK]); + + return idr_find_slowpath(idr, id); +} + +/** + * idr_get_new - allocate new idr entry + * @idp: idr handle + * @ptr: pointer you want associated with the id + * @id: pointer to the allocated handle + * + * Simple wrapper around idr_get_new_above() w/ @starting_id of zero. + */ +static inline int idr_get_new(struct idr *idp, void *ptr, int *id) +{ + return idr_get_new_above(idp, ptr, 0, id); +} + +/** + * idr_for_each_entry - iterate over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + */ +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ + entry != NULL; \ + ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) + +void __idr_remove_all(struct idr *idp); /* don't use */ + +/** + * idr_remove_all - remove all ids from the given idr tree + * @idp: idr handle + * + * If you're trying to destroy @idp, calling idr_destroy() is enough. + * This is going away. Don't use. + */ +static inline void __deprecated idr_remove_all(struct idr *idp) +{ + __idr_remove_all(idp); +} /* * IDA - IDR based id allocator, use when translation from id to @@ -141,7 +183,6 @@ struct ida { int ida_pre_get(struct ida *ida, gfp_t gfp_mask); int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); -int ida_get_new(struct ida *ida, int *p_id); void ida_remove(struct ida *ida, int id); void ida_destroy(struct ida *ida); void ida_init(struct ida *ida); @@ -150,17 +191,18 @@ int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, gfp_t gfp_mask); void ida_simple_remove(struct ida *ida, unsigned int id); -void __init idr_init_cache(void); - /** - * idr_for_each_entry - iterate over an idr's elements of a given type - * @idp: idr handle - * @entry: the type * to use as cursor - * @id: id entry's key + * ida_get_new - allocate new ID + * @ida: idr handle + * @p_id: pointer to the allocated handle + * + * Simple wrapper around ida_get_new_above() w/ @starting_id of zero. */ -#define idr_for_each_entry(idp, entry, id) \ - for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ - entry != NULL; \ - ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) +static inline int ida_get_new(struct ida *ida, int *p_id) +{ + return ida_get_new_above(ida, 0, p_id); +} + +void __init idr_init_cache(void); #endif /* __IDR_H__ */ diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 4648d8021244..cfd21e3d5506 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -216,11 +216,10 @@ static inline struct hlist_head *team_port_index_hash(struct team *team, static inline struct team_port *team_get_port_by_index(struct team *team, int port_index) { - struct hlist_node *p; struct team_port *port; struct hlist_head *head = team_port_index_hash(team, port_index); - hlist_for_each_entry(port, p, head, hlist) + hlist_for_each_entry(port, head, hlist) if (port->index == port_index) return port; return NULL; @@ -228,11 +227,10 @@ static inline struct team_port *team_get_port_by_index(struct team *team, static inline struct team_port *team_get_port_by_index_rcu(struct team *team, int port_index) { - struct hlist_node *p; struct team_port *port; struct hlist_head *head = team_port_index_hash(team, port_index); - hlist_for_each_entry_rcu(port, p, head, hlist) + hlist_for_each_entry_rcu(port, head, hlist) if (port->index == port_index) return port; return NULL; diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 1487e7906bbd..1f9f56e28851 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -35,10 +35,6 @@ #include <uapi/linux/ipmi.h> - -/* - * The in-kernel interface. - */ #include <linux/list.h> #include <linux/proc_fs.h> diff --git a/include/linux/list.h b/include/linux/list.h index cc6d2aa6b415..d991cc147c98 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -666,54 +666,49 @@ static inline void hlist_move_list(struct hlist_head *old, for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ pos = n) +#define hlist_entry_safe(ptr, type, member) \ + (ptr) ? hlist_entry(ptr, type, member) : NULL + /** * hlist_for_each_entry - iterate over list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry(tpos, pos, head, member) \ - for (pos = (head)->first; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) +#define hlist_for_each_entry(pos, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) /** * hlist_for_each_entry_continue - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_continue(tpos, pos, member) \ - for (pos = (pos)->next; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) +#define hlist_for_each_entry_continue(pos, member) \ + for (pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member);\ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) /** * hlist_for_each_entry_from - iterate over a hlist continuing from current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_from(tpos, pos, member) \ - for (; pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) +#define hlist_for_each_entry_from(pos, member) \ + for (; pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) /** * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @n: another &struct hlist_node to use as temporary storage * @head: the head for your list. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ - for (pos = (head)->first; \ - pos && ({ n = pos->next; 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = n) +#define hlist_for_each_entry_safe(pos, n, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);\ + pos && ({ n = pos->member.next; 1; }); \ + pos = hlist_entry_safe(n, typeof(*pos), member)) #endif diff --git a/include/linux/llist.h b/include/linux/llist.h index d0ab98f73d38..a5199f6d0e82 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -125,31 +125,6 @@ static inline void init_llist_head(struct llist_head *list) (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) /** - * llist_for_each_entry_safe - iterate safely against remove over some entries - * of lock-less list of given type. - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as a temporary storage. - * @node: the fist entry of deleted list entries. - * @member: the name of the llist_node with the struct. - * - * In general, some entries of the lock-less list can be traversed - * safely only after being removed from list, so start with an entry - * instead of list head. This variant allows removal of entries - * as we iterate. - * - * If being used on entries deleted from lock-less list directly, the - * traverse order is from the newest to the oldest added entry. If - * you want to traverse from the oldest to the newest, you must - * reverse the order by yourself before traversing. - */ -#define llist_for_each_entry_safe(pos, n, node, member) \ - for ((pos) = llist_entry((node), typeof(*(pos)), member), \ - (n) = (pos)->member.next; \ - &(pos)->member != NULL; \ - (pos) = llist_entry(n, typeof(*(pos)), member), \ - (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL) - -/** * llist_empty - tests whether a lock-less list is empty * @head: the list to test * diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 0e62d84f9f7f..dcaad79f54ed 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -212,7 +212,8 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock); void nlmclnt_recovery(struct nlm_host *); -int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); +int nlmclnt_reclaim(struct nlm_host *, struct file_lock *, + struct nlm_rqst *); void nlmclnt_next_cookie(struct nlm_cookie *); /* diff --git a/include/linux/mfd/lp8788.h b/include/linux/mfd/lp8788.h index 2a32b16f79cb..786bf6679a28 100644 --- a/include/linux/mfd/lp8788.h +++ b/include/linux/mfd/lp8788.h @@ -16,6 +16,7 @@ #include <linux/gpio.h> #include <linux/irqdomain.h> +#include <linux/pwm.h> #include <linux/regmap.h> #define LP8788_DEV_BUCK "lp8788-buck" @@ -124,11 +125,6 @@ enum lp8788_bl_ramp_step { LP8788_RAMP_65538us, }; -enum lp8788_bl_pwm_polarity { - LP8788_PWM_ACTIVE_HIGH, - LP8788_PWM_ACTIVE_LOW, -}; - enum lp8788_isink_scale { LP8788_ISINK_SCALE_100mA, LP8788_ISINK_SCALE_120mA, @@ -229,16 +225,6 @@ struct lp8788_charger_platform_data { }; /* - * struct lp8788_bl_pwm_data - * @pwm_set_intensity : set duty of pwm - * @pwm_get_intensity : get current duty of pwm - */ -struct lp8788_bl_pwm_data { - void (*pwm_set_intensity) (int brightness, int max_brightness); - int (*pwm_get_intensity) (int max_brightness); -}; - -/* * struct lp8788_backlight_platform_data * @name : backlight driver name. (default: "lcd-backlight") * @initial_brightness : initial value of backlight brightness @@ -248,8 +234,8 @@ struct lp8788_bl_pwm_data { * @rise_time : brightness ramp up step time * @fall_time : brightness ramp down step time * @pwm_pol : pwm polarity setting when bl_mode is pwm based - * @pwm_data : platform specific pwm generation functions - * only valid when bl_mode is pwm based + * @period_ns : platform specific pwm period value. unit is nano. + Only valid when bl_mode is LP8788_BL_COMB_PWM_BASED */ struct lp8788_backlight_platform_data { char *name; @@ -259,8 +245,8 @@ struct lp8788_backlight_platform_data { enum lp8788_bl_full_scale_current full_scale; enum lp8788_bl_ramp_step rise_time; enum lp8788_bl_ramp_step fall_time; - enum lp8788_bl_pwm_polarity pwm_pol; - struct lp8788_bl_pwm_data pwm_data; + enum pwm_polarity pwm_pol; + unsigned int period_ns; }; /* diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index fed3def62818..779cf7c4a3d1 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -33,8 +33,7 @@ struct ieee1394_device_id { __u32 model_id; __u32 specifier_id; __u32 version; - kernel_ulong_t driver_data - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; }; @@ -148,8 +147,7 @@ struct hid_device_id { __u16 group; __u32 vendor; __u32 product; - kernel_ulong_t driver_data - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; }; /* s390 CCW devices */ @@ -173,8 +171,6 @@ struct ccw_device_id { struct ap_device_id { __u16 match_flags; /* which fields to match against */ __u8 dev_type; /* device type */ - __u8 pad1; - __u32 pad2; kernel_ulong_t driver_info; }; @@ -184,13 +180,10 @@ struct ap_device_id { struct css_device_id { __u8 match_flags; __u8 type; /* subchannel type */ - __u16 pad2; - __u32 pad3; kernel_ulong_t driver_data; }; -#define ACPI_ID_LEN 16 /* only 9 bytes needed here, 16 bytes are used */ - /* to workaround crosscompile issues */ +#define ACPI_ID_LEN 9 struct acpi_device_id { __u8 id[ACPI_ID_LEN]; @@ -231,11 +224,7 @@ struct of_device_id char name[32]; char type[32]; char compatible[128]; -#ifdef __KERNEL__ const void *data; -#else - kernel_ulong_t data; -#endif }; /* VIO */ @@ -260,24 +249,14 @@ struct pcmcia_device_id { /* for pseudo multi-function devices */ __u8 device_no; - __u32 prod_id_hash[4] - __attribute__((aligned(sizeof(__u32)))); + __u32 prod_id_hash[4]; /* not matched against in kernelspace*/ -#ifdef __KERNEL__ const char * prod_id[4]; -#else - kernel_ulong_t prod_id[4] - __attribute__((aligned(sizeof(kernel_ulong_t)))); -#endif /* not matched against */ kernel_ulong_t driver_info; -#ifdef __KERNEL__ char * cisfile; -#else - kernel_ulong_t cisfile; -#endif }; #define PCMCIA_DEV_ID_MATCH_MANF_ID 0x0001 @@ -373,8 +352,7 @@ struct sdio_device_id { __u8 class; /* Standard interface or SDIO_ANY_ID */ __u16 vendor; /* Vendor or SDIO_ANY_ID */ __u16 device; /* Device ID or SDIO_ANY_ID */ - kernel_ulong_t driver_data /* Data private to the driver */ - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; /* Data private to the driver */ }; /* SSB core, see drivers/ssb/ */ @@ -420,8 +398,7 @@ struct virtio_device_id { */ struct hv_vmbus_device_id { __u8 guid[16]; - kernel_ulong_t driver_data /* Data private to the driver */ - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; /* Data private to the driver */ }; /* rpmsg */ @@ -440,8 +417,7 @@ struct rpmsg_device_id { struct i2c_device_id { char name[I2C_NAME_SIZE]; - kernel_ulong_t driver_data /* Data private to the driver */ - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; /* Data private to the driver */ }; /* spi */ @@ -451,8 +427,7 @@ struct i2c_device_id { struct spi_device_id { char name[SPI_NAME_SIZE]; - kernel_ulong_t driver_data /* Data private to the driver */ - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; /* Data private to the driver */ }; /* dmi */ @@ -484,15 +459,6 @@ struct dmi_strmatch { char substr[79]; }; -#ifndef __KERNEL__ -struct dmi_system_id { - kernel_ulong_t callback; - kernel_ulong_t ident; - struct dmi_strmatch matches[4]; - kernel_ulong_t driver_data - __attribute__((aligned(sizeof(kernel_ulong_t)))); -}; -#else struct dmi_system_id { int (*callback)(const struct dmi_system_id *); const char *ident; @@ -506,7 +472,6 @@ struct dmi_system_id { * error: storage size of '__mod_dmi_device_table' isn't known */ #define dmi_device_id dmi_system_id -#endif #define DMI_MATCH(a, b) { a, b } @@ -515,8 +480,7 @@ struct dmi_system_id { struct platform_device_id { char name[PLATFORM_NAME_SIZE]; - kernel_ulong_t driver_data - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; }; #define MDIO_MODULE_PREFIX "mdio:" @@ -572,11 +536,7 @@ struct isapnp_device_id { struct amba_id { unsigned int id; unsigned int mask; -#ifndef __KERNEL__ - kernel_ulong_t data; -#else void *data; -#endif }; /* diff --git a/include/linux/pid.h b/include/linux/pid.h index 2381c973d897..a089a3c447fc 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -176,9 +176,8 @@ pid_t pid_vnr(struct pid *pid); #define do_each_pid_task(pid, type, task) \ do { \ - struct hlist_node *pos___; \ if ((pid) != NULL) \ - hlist_for_each_entry_rcu((task), pos___, \ + hlist_for_each_entry_rcu((task), \ &(pid)->tasks[type], pids[type].node) { /* diff --git a/include/linux/rculist.h b/include/linux/rculist.h index c92dd28eaa6c..8089e35d47ac 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -445,8 +445,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, /** * hlist_for_each_entry_rcu - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * @@ -454,16 +453,16 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(hlist_next_rcu(pos))) +#define hlist_for_each_entry_rcu(pos, head, member) \ + for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * @@ -471,35 +470,36 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu_bh(tpos, pos, head, member) \ - for (pos = rcu_dereference_bh((head)->first); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_bh(pos->next)) +#define hlist_for_each_entry_rcu_bh(pos, head, member) \ + for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_continue_rcu - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_continue_rcu(tpos, pos, member) \ - for (pos = rcu_dereference((pos)->next); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference(pos->next)) +#define hlist_for_each_entry_continue_rcu(pos, member) \ + for (pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ + typeof(*(pos)), member)) /** * hlist_for_each_entry_continue_rcu_bh - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member) \ - for (pos = rcu_dereference_bh((pos)->next); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_bh(pos->next)) +#define hlist_for_each_entry_continue_rcu_bh(pos, member) \ + for (pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ + typeof(*(pos)), member)) #endif /* __KERNEL__ */ diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 4bd6c06eb28e..2d8bdaef9611 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -231,6 +231,41 @@ size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, */ #define SG_MAX_SINGLE_ALLOC (PAGE_SIZE / sizeof(struct scatterlist)) +/* + * sg page iterator + * + * Iterates over sg entries page-by-page. On each successful iteration, + * @piter->page points to the current page, @piter->sg to the sg holding this + * page and @piter->sg_pgoffset to the page's page offset within the sg. The + * iteration will stop either when a maximum number of sg entries was reached + * or a terminating sg (sg_last(sg) == true) was reached. + */ +struct sg_page_iter { + struct page *page; /* current page */ + struct scatterlist *sg; /* sg holding the page */ + unsigned int sg_pgoffset; /* page offset within the sg */ + + /* these are internal states, keep away */ + unsigned int __nents; /* remaining sg entries */ + int __pg_advance; /* nr pages to advance at the + * next step */ +}; + +bool __sg_page_iter_next(struct sg_page_iter *piter); +void __sg_page_iter_start(struct sg_page_iter *piter, + struct scatterlist *sglist, unsigned int nents, + unsigned long pgoffset); + +/** + * for_each_sg_page - iterate over the pages of the given sg list + * @sglist: sglist to iterate over + * @piter: page iterator to hold current page, sg, sg_pgoffset + * @nents: maximum number of sg entries to iterate over + * @pgoffset: starting page offset + */ +#define for_each_sg_page(sglist, piter, nents, pgoffset) \ + for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \ + __sg_page_iter_next(piter);) /* * Mapping sg iterator @@ -258,11 +293,11 @@ struct sg_mapping_iter { void *addr; /* pointer to the mapped area */ size_t length; /* length of the mapped area */ size_t consumed; /* number of consumed bytes */ + struct sg_page_iter piter; /* page iterator */ /* these are internal states, keep away */ - struct scatterlist *__sg; /* current entry */ - unsigned int __nents; /* nr of remaining entries */ - unsigned int __offset; /* offset within sg */ + unsigned int __offset; /* offset within page */ + unsigned int __remaining; /* remaining bytes on page */ unsigned int __flags; }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 6853bf947fde..d35d2b6ddbfb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -346,11 +346,6 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); -/* get/set_dumpable() values */ -#define SUID_DUMPABLE_DISABLED 0 -#define SUID_DUMPABLE_ENABLED 1 -#define SUID_DUMPABLE_SAFE 2 - /* mm flags */ /* dumpable bits */ #define MMF_DUMPABLE 0 /* core dump is permitted */ diff --git a/include/linux/sunrpc/addr.h b/include/linux/sunrpc/addr.h new file mode 100644 index 000000000000..07d8e53bedfc --- /dev/null +++ b/include/linux/sunrpc/addr.h @@ -0,0 +1,170 @@ +/* + * linux/include/linux/sunrpc/addr.h + * + * Various routines for copying and comparing sockaddrs and for + * converting them to and from presentation format. + */ +#ifndef _LINUX_SUNRPC_ADDR_H +#define _LINUX_SUNRPC_ADDR_H + +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <net/ipv6.h> + +size_t rpc_ntop(const struct sockaddr *, char *, const size_t); +size_t rpc_pton(struct net *, const char *, const size_t, + struct sockaddr *, const size_t); +char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); +size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, + struct sockaddr *, const size_t); + +static inline unsigned short rpc_get_port(const struct sockaddr *sap) +{ + switch (sap->sa_family) { + case AF_INET: + return ntohs(((struct sockaddr_in *)sap)->sin_port); + case AF_INET6: + return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + } + return 0; +} + +static inline void rpc_set_port(struct sockaddr *sap, + const unsigned short port) +{ + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); + break; + } +} + +#define IPV6_SCOPE_DELIMITER '%' +#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") + +static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; + + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; +} + +static inline bool __rpc_copy_addr4(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in *ssin = (struct sockaddr_in *) src; + struct sockaddr_in *dsin = (struct sockaddr_in *) dst; + + dsin->sin_family = ssin->sin_family; + dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; + return true; +} + +#if IS_ENABLED(CONFIG_IPV6) +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; + + if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) + return false; + else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) + return sin1->sin6_scope_id == sin2->sin6_scope_id; + + return true; +} + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; + struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; + + dsin6->sin6_family = ssin6->sin6_family; + dsin6->sin6_addr = ssin6->sin6_addr; + dsin6->sin6_scope_id = ssin6->sin6_scope_id; + return true; +} +#else /* !(IS_ENABLED(CONFIG_IPV6) */ +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + return false; +} + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + return false; +} +#endif /* !(IS_ENABLED(CONFIG_IPV6) */ + +/** + * rpc_cmp_addr - compare the address portion of two sockaddrs. + * @sap1: first sockaddr + * @sap2: second sockaddr + * + * Just compares the family and address portion. Ignores port, but + * compares the scope if it's a link-local address. + * + * Returns true if the addrs are equal, false if they aren't. + */ +static inline bool rpc_cmp_addr(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + if (sap1->sa_family == sap2->sa_family) { + switch (sap1->sa_family) { + case AF_INET: + return __rpc_cmp_addr4(sap1, sap2); + case AF_INET6: + return __rpc_cmp_addr6(sap1, sap2); + } + } + return false; +} + +/** + * rpc_copy_addr - copy the address portion of one sockaddr to another + * @dst: destination sockaddr + * @src: source sockaddr + * + * Just copies the address portion and family. Ignores port, scope, etc. + * Caller is responsible for making certain that dst is large enough to hold + * the address in src. Returns true if address family is supported. Returns + * false otherwise. + */ +static inline bool rpc_copy_addr(struct sockaddr *dst, + const struct sockaddr *src) +{ + switch (src->sa_family) { + case AF_INET: + return __rpc_copy_addr4(dst, src); + case AF_INET6: + return __rpc_copy_addr6(dst, src); + } + return false; +} + +/** + * rpc_get_scope_id - return scopeid for a given sockaddr + * @sa: sockaddr to get scopeid from + * + * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if + * not an AF_INET6 address. + */ +static inline u32 rpc_get_scope_id(const struct sockaddr *sa) +{ + if (sa->sa_family != AF_INET6) + return 0; + + return ((struct sockaddr_in6 *) sa)->sin6_scope_id; +} + +#endif /* _LINUX_SUNRPC_ADDR_H */ diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 5dc9ee4d616e..303399b1ba59 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -83,6 +83,10 @@ struct cache_detail { int (*cache_upcall)(struct cache_detail *, struct cache_head *); + void (*cache_request)(struct cache_detail *cd, + struct cache_head *ch, + char **bpp, int *blen); + int (*cache_parse)(struct cache_detail *, char *buf, int len); @@ -157,11 +161,7 @@ sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); extern int -sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)); +sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h); extern void cache_clean_deferred(void *owner); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 34206b84d8da..4a4abde000cb 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -165,157 +165,5 @@ size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); int rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t); -size_t rpc_ntop(const struct sockaddr *, char *, const size_t); -size_t rpc_pton(struct net *, const char *, const size_t, - struct sockaddr *, const size_t); -char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); -size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, - struct sockaddr *, const size_t); - -static inline unsigned short rpc_get_port(const struct sockaddr *sap) -{ - switch (sap->sa_family) { - case AF_INET: - return ntohs(((struct sockaddr_in *)sap)->sin_port); - case AF_INET6: - return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); - } - return 0; -} - -static inline void rpc_set_port(struct sockaddr *sap, - const unsigned short port) -{ - switch (sap->sa_family) { - case AF_INET: - ((struct sockaddr_in *)sap)->sin_port = htons(port); - break; - case AF_INET6: - ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); - break; - } -} - -#define IPV6_SCOPE_DELIMITER '%' -#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") - -static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; - const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; - - return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; -} - -static inline bool __rpc_copy_addr4(struct sockaddr *dst, - const struct sockaddr *src) -{ - const struct sockaddr_in *ssin = (struct sockaddr_in *) src; - struct sockaddr_in *dsin = (struct sockaddr_in *) dst; - - dsin->sin_family = ssin->sin_family; - dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; - return true; -} - -#if IS_ENABLED(CONFIG_IPV6) -static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; - const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; - - if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) - return false; - else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) - return sin1->sin6_scope_id == sin2->sin6_scope_id; - - return true; -} - -static inline bool __rpc_copy_addr6(struct sockaddr *dst, - const struct sockaddr *src) -{ - const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; - struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; - - dsin6->sin6_family = ssin6->sin6_family; - dsin6->sin6_addr = ssin6->sin6_addr; - return true; -} -#else /* !(IS_ENABLED(CONFIG_IPV6) */ -static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - return false; -} - -static inline bool __rpc_copy_addr6(struct sockaddr *dst, - const struct sockaddr *src) -{ - return false; -} -#endif /* !(IS_ENABLED(CONFIG_IPV6) */ - -/** - * rpc_cmp_addr - compare the address portion of two sockaddrs. - * @sap1: first sockaddr - * @sap2: second sockaddr - * - * Just compares the family and address portion. Ignores port, scope, etc. - * Returns true if the addrs are equal, false if they aren't. - */ -static inline bool rpc_cmp_addr(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - if (sap1->sa_family == sap2->sa_family) { - switch (sap1->sa_family) { - case AF_INET: - return __rpc_cmp_addr4(sap1, sap2); - case AF_INET6: - return __rpc_cmp_addr6(sap1, sap2); - } - } - return false; -} - -/** - * rpc_copy_addr - copy the address portion of one sockaddr to another - * @dst: destination sockaddr - * @src: source sockaddr - * - * Just copies the address portion and family. Ignores port, scope, etc. - * Caller is responsible for making certain that dst is large enough to hold - * the address in src. Returns true if address family is supported. Returns - * false otherwise. - */ -static inline bool rpc_copy_addr(struct sockaddr *dst, - const struct sockaddr *src) -{ - switch (src->sa_family) { - case AF_INET: - return __rpc_copy_addr4(dst, src); - case AF_INET6: - return __rpc_copy_addr6(dst, src); - } - return false; -} - -/** - * rpc_get_scope_id - return scopeid for a given sockaddr - * @sa: sockaddr to get scopeid from - * - * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if - * not an AF_INET6 address. - */ -static inline u32 rpc_get_scope_id(const struct sockaddr *sa) -{ - if (sa->sa_family != AF_INET6) - return 0; - - return ((struct sockaddr_in6 *) sa)->sin6_scope_id; -} - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 676ddf53b3ee..1f0216b9a6c9 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -50,6 +50,7 @@ struct svc_pool { unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ struct svc_pool_stats sp_stats; /* statistics on pool operation */ + int sp_task_pending;/* has pending task */ } ____cacheline_aligned_in_smp; /* diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 63988990bd36..15f9204ee70b 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -56,7 +56,7 @@ struct xdr_buf { struct kvec head[1], /* RPC header + non-page data */ tail[1]; /* Appended after page data */ - struct page ** pages; /* Array of contiguous pages */ + struct page ** pages; /* Array of pages */ unsigned int page_base, /* Start of page data */ page_len, /* Length of page data */ flags; /* Flags for data disposition */ @@ -152,6 +152,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b82a83aba311..9a9367c0c076 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -87,9 +87,9 @@ int inode_wait(void *); void writeback_inodes_sb(struct super_block *, enum wb_reason reason); void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); -int writeback_inodes_sb_if_idle(struct super_block *, enum wb_reason reason); -int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr, - enum wb_reason reason); +int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); +int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, + enum wb_reason reason); void sync_inodes_sb(struct super_block *); long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason); diff --git a/include/net/ax25.h b/include/net/ax25.h index 53539acbd81a..89ed9ac5701f 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -161,8 +161,8 @@ typedef struct ax25_uid_assoc { ax25_address call; } ax25_uid_assoc; -#define ax25_uid_for_each(__ax25, node, list) \ - hlist_for_each_entry(__ax25, node, list, uid_node) +#define ax25_uid_for_each(__ax25, list) \ + hlist_for_each_entry(__ax25, list, uid_node) #define ax25_uid_hold(ax25) \ atomic_inc(&((ax25)->refcount)) @@ -247,8 +247,8 @@ typedef struct ax25_cb { #define ax25_sk(__sk) ((ax25_cb *)(__sk)->sk_protinfo) -#define ax25_for_each(__ax25, node, list) \ - hlist_for_each_entry(__ax25, node, list, ax25_node) +#define ax25_for_each(__ax25, list) \ + hlist_for_each_entry(__ax25, list, ax25_node) #define ax25_cb_hold(__ax25) \ atomic_inc(&((__ax25)->refcount)) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 7b2ae9d37076..ef83d9e844b5 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -94,8 +94,8 @@ static inline struct net *ib_net(struct inet_bind_bucket *ib) return read_pnet(&ib->ib_net); } -#define inet_bind_bucket_for_each(tb, pos, head) \ - hlist_for_each_entry(tb, pos, head, node) +#define inet_bind_bucket_for_each(tb, head) \ + hlist_for_each_entry(tb, head, node) struct inet_bind_hashbucket { spinlock_t lock; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 7d658d577368..f908dfc06505 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -178,11 +178,11 @@ static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) #define inet_twsk_for_each(tw, node, head) \ hlist_nulls_for_each_entry(tw, node, head, tw_node) -#define inet_twsk_for_each_inmate(tw, node, jail) \ - hlist_for_each_entry(tw, node, jail, tw_death_node) +#define inet_twsk_for_each_inmate(tw, jail) \ + hlist_for_each_entry(tw, jail, tw_death_node) -#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ - hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) +#define inet_twsk_for_each_inmate_safe(tw, safe, jail) \ + hlist_for_each_entry_safe(tw, safe, jail, tw_death_node) static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) { diff --git a/include/net/netrom.h b/include/net/netrom.h index f0793c1cb5f8..121dcf854db5 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -154,17 +154,17 @@ static __inline__ void nr_node_unlock(struct nr_node *nr_node) nr_node_put(nr_node); } -#define nr_neigh_for_each(__nr_neigh, node, list) \ - hlist_for_each_entry(__nr_neigh, node, list, neigh_node) +#define nr_neigh_for_each(__nr_neigh, list) \ + hlist_for_each_entry(__nr_neigh, list, neigh_node) -#define nr_neigh_for_each_safe(__nr_neigh, node, node2, list) \ - hlist_for_each_entry_safe(__nr_neigh, node, node2, list, neigh_node) +#define nr_neigh_for_each_safe(__nr_neigh, node2, list) \ + hlist_for_each_entry_safe(__nr_neigh, node2, list, neigh_node) -#define nr_node_for_each(__nr_node, node, list) \ - hlist_for_each_entry(__nr_node, node, list, node_node) +#define nr_node_for_each(__nr_node, list) \ + hlist_for_each_entry(__nr_node, list, node_node) -#define nr_node_for_each_safe(__nr_node, node, node2, list) \ - hlist_for_each_entry_safe(__nr_node, node, node2, list, node_node) +#define nr_node_for_each_safe(__nr_node, node2, list) \ + hlist_for_each_entry_safe(__nr_node, node2, list, node_node) /*********************************************************************/ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2761c905504e..f10818fc8804 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -339,11 +339,10 @@ static inline struct Qdisc_class_common * qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id) { struct Qdisc_class_common *cl; - struct hlist_node *n; unsigned int h; h = qdisc_class_hash(id, hash->hashmask); - hlist_for_each_entry(cl, n, &hash->hash[h], hnode) { + hlist_for_each_entry(cl, &hash->hash[h], hnode) { if (cl->classid == id) return cl; } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 7fdf298a47ef..df85a0c0f2d5 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -675,8 +675,8 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag) return h & (sctp_assoc_hashsize - 1); } -#define sctp_for_each_hentry(epb, node, head) \ - hlist_for_each_entry(epb, node, head, node) +#define sctp_for_each_hentry(epb, head) \ + hlist_for_each_entry(epb, head, node) /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) diff --git a/include/net/sock.h b/include/net/sock.h index a66caa223d18..14f6e9d19dc7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -606,24 +606,23 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_add_head(&sk->sk_bind_node, list); } -#define sk_for_each(__sk, node, list) \ - hlist_for_each_entry(__sk, node, list, sk_node) -#define sk_for_each_rcu(__sk, node, list) \ - hlist_for_each_entry_rcu(__sk, node, list, sk_node) +#define sk_for_each(__sk, list) \ + hlist_for_each_entry(__sk, list, sk_node) +#define sk_for_each_rcu(__sk, list) \ + hlist_for_each_entry_rcu(__sk, list, sk_node) #define sk_nulls_for_each(__sk, node, list) \ hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) #define sk_nulls_for_each_rcu(__sk, node, list) \ hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) -#define sk_for_each_from(__sk, node) \ - if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ - hlist_for_each_entry_from(__sk, node, sk_node) +#define sk_for_each_from(__sk) \ + hlist_for_each_entry_from(__sk, sk_node) #define sk_nulls_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \ hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node) -#define sk_for_each_safe(__sk, node, tmp, list) \ - hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node) -#define sk_for_each_bound(__sk, node, list) \ - hlist_for_each_entry(__sk, node, list, sk_bind_node) +#define sk_for_each_safe(__sk, tmp, list) \ + hlist_for_each_entry_safe(__sk, tmp, list, sk_node) +#define sk_for_each_bound(__sk, list) \ + hlist_for_each_entry(__sk, list, sk_bind_node) static inline struct user_namespace *sk_user_ns(struct sock *sk) { diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 05c5e61f0a7c..9961726523d0 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -6,10 +6,61 @@ #include <linux/blktrace_api.h> #include <linux/blkdev.h> +#include <linux/buffer_head.h> #include <linux/tracepoint.h> #define RWBS_LEN 8 +DECLARE_EVENT_CLASS(block_buffer, + + TP_PROTO(struct buffer_head *bh), + + TP_ARGS(bh), + + TP_STRUCT__entry ( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( size_t, size ) + ), + + TP_fast_assign( + __entry->dev = bh->b_bdev->bd_dev; + __entry->sector = bh->b_blocknr; + __entry->size = bh->b_size; + ), + + TP_printk("%d,%d sector=%llu size=%zu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->sector, __entry->size + ) +); + +/** + * block_touch_buffer - mark a buffer accessed + * @bh: buffer_head being touched + * + * Called from touch_buffer(). + */ +DEFINE_EVENT(block_buffer, block_touch_buffer, + + TP_PROTO(struct buffer_head *bh), + + TP_ARGS(bh) +); + +/** + * block_dirty_buffer - mark a buffer dirty + * @bh: buffer_head being dirtied + * + * Called from mark_buffer_dirty(). + */ +DEFINE_EVENT(block_buffer, block_dirty_buffer, + + TP_PROTO(struct buffer_head *bh), + + TP_ARGS(bh) +); + DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), @@ -206,7 +257,6 @@ TRACE_EVENT(block_bio_bounce, /** * block_bio_complete - completed all work on the block operation - * @q: queue holding the block operation * @bio: block operation completed * @error: io error value * @@ -215,9 +265,9 @@ TRACE_EVENT(block_bio_bounce, */ TRACE_EVENT(block_bio_complete, - TP_PROTO(struct request_queue *q, struct bio *bio, int error), + TP_PROTO(struct bio *bio, int error), - TP_ARGS(q, bio, error), + TP_ARGS(bio, error), TP_STRUCT__entry( __field( dev_t, dev ) @@ -228,7 +278,8 @@ TRACE_EVENT(block_bio_complete, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio->bi_bdev ? + bio->bi_bdev->bd_dev : 0; __entry->sector = bio->bi_sector; __entry->nr_sector = bio->bi_size >> 9; __entry->error = error; @@ -241,11 +292,11 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -DECLARE_EVENT_CLASS(block_bio, +DECLARE_EVENT_CLASS(block_bio_merge, - TP_PROTO(struct request_queue *q, struct bio *bio), + TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - TP_ARGS(q, bio), + TP_ARGS(q, rq, bio), TP_STRUCT__entry( __field( dev_t, dev ) @@ -272,31 +323,33 @@ DECLARE_EVENT_CLASS(block_bio, /** * block_bio_backmerge - merging block operation to the end of an existing operation * @q: queue holding operation + * @rq: request bio is being merged into * @bio: new block operation to merge * * Merging block request @bio to the end of an existing block request * in queue @q. */ -DEFINE_EVENT(block_bio, block_bio_backmerge, +DEFINE_EVENT(block_bio_merge, block_bio_backmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), + TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - TP_ARGS(q, bio) + TP_ARGS(q, rq, bio) ); /** * block_bio_frontmerge - merging block operation to the beginning of an existing operation * @q: queue holding operation + * @rq: request bio is being merged into * @bio: new block operation to merge * * Merging block IO operation @bio to the beginning of an existing block * operation in queue @q. */ -DEFINE_EVENT(block_bio, block_bio_frontmerge, +DEFINE_EVENT(block_bio_merge, block_bio_frontmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), + TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - TP_ARGS(q, bio) + TP_ARGS(q, rq, bio) ); /** @@ -306,11 +359,32 @@ DEFINE_EVENT(block_bio, block_bio_frontmerge, * * About to place the block IO operation @bio into queue @q. */ -DEFINE_EVENT(block_bio, block_bio_queue, +TRACE_EVENT(block_bio_queue, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio) + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, RWBS_LEN ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); DECLARE_EVENT_CLASS(block_get_rq, diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index b453d92c2253..6a16fd2e70ed 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -32,6 +32,115 @@ struct wb_writeback_work; +TRACE_EVENT(writeback_dirty_page, + + TP_PROTO(struct page *page, struct address_space *mapping), + + TP_ARGS(page, mapping), + + TP_STRUCT__entry ( + __array(char, name, 32) + __field(unsigned long, ino) + __field(pgoff_t, index) + ), + + TP_fast_assign( + strncpy(__entry->name, + mapping ? dev_name(mapping->backing_dev_info->dev) : "(unknown)", 32); + __entry->ino = mapping ? mapping->host->i_ino : 0; + __entry->index = page->index; + ), + + TP_printk("bdi %s: ino=%lu index=%lu", + __entry->name, + __entry->ino, + __entry->index + ) +); + +DECLARE_EVENT_CLASS(writeback_dirty_inode_template, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags), + + TP_STRUCT__entry ( + __array(char, name, 32) + __field(unsigned long, ino) + __field(unsigned long, flags) + ), + + TP_fast_assign( + struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; + + /* may be called for files on pseudo FSes w/ unregistered bdi */ + strncpy(__entry->name, + bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + __entry->ino = inode->i_ino; + __entry->flags = flags; + ), + + TP_printk("bdi %s: ino=%lu flags=%s", + __entry->name, + __entry->ino, + show_inode_state(__entry->flags) + ) +); + +DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags) +); + +DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags) +); + +DECLARE_EVENT_CLASS(writeback_write_inode_template, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc), + + TP_STRUCT__entry ( + __array(char, name, 32) + __field(unsigned long, ino) + __field(int, sync_mode) + ), + + TP_fast_assign( + strncpy(__entry->name, + dev_name(inode->i_mapping->backing_dev_info->dev), 32); + __entry->ino = inode->i_ino; + __entry->sync_mode = wbc->sync_mode; + ), + + TP_printk("bdi %s: ino=%lu sync_mode=%d", + __entry->name, + __entry->ino, + __entry->sync_mode + ) +); + +DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode_start, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc) +); + +DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc) +); + DECLARE_EVENT_CLASS(writeback_work_class, TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), TP_ARGS(bdi, work), @@ -479,6 +588,13 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, ) ); +DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode_start, + TP_PROTO(struct inode *inode, + struct writeback_control *wbc, + unsigned long nr_to_write), + TP_ARGS(inode, wbc, nr_to_write) +); + DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, TP_PROTO(struct inode *inode, struct writeback_control *wbc, diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h index 33fbc99b3812..7b26a62e5707 100644 --- a/include/uapi/linux/ipmi.h +++ b/include/uapi/linux/ipmi.h @@ -59,15 +59,7 @@ * if it becomes full and it is queried once a second to see if * anything is in it. Incoming commands to the driver will get * delivered as commands. - * - * This driver provides two main interfaces: one for in-kernel - * applications and another for userland applications. The - * capabilities are basically the same for both interface, although - * the interfaces are somewhat different. The stuff in the - * #ifdef __KERNEL__ below is the in-kernel interface. The userland - * interface is defined later in the file. */ - - + */ /* * This is an overlay for all the address types, so it's easy to diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h index 996719f82e28..f055e58b3147 100644 --- a/include/uapi/linux/msdos_fs.h +++ b/include/uapi/linux/msdos_fs.h @@ -87,6 +87,8 @@ #define IS_FSINFO(x) (le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \ && le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2) +#define FAT_STATE_DIRTY 0x01 + struct __fat_dirent { long d_ino; __kernel_off_t d_off; @@ -120,14 +122,34 @@ struct fat_boot_sector { __le32 hidden; /* hidden sectors (unused) */ __le32 total_sect; /* number of sectors (if sectors == 0) */ - /* The following fields are only used by FAT32 */ - __le32 fat32_length; /* sectors/FAT */ - __le16 flags; /* bit 8: fat mirroring, low 4: active fat */ - __u8 version[2]; /* major, minor filesystem version */ - __le32 root_cluster; /* first cluster in root directory */ - __le16 info_sector; /* filesystem info sector */ - __le16 backup_boot; /* backup boot sector */ - __le16 reserved2[6]; /* Unused */ + union { + struct { + /* Extended BPB Fields for FAT16 */ + __u8 drive_number; /* Physical drive number */ + __u8 state; /* undocumented, but used + for mount state. */ + /* other fiealds are not added here */ + } fat16; + + struct { + /* only used by FAT32 */ + __le32 length; /* sectors/FAT */ + __le16 flags; /* bit 8: fat mirroring, + low 4: active fat */ + __u8 version[2]; /* major, minor filesystem + version */ + __le32 root_cluster; /* first cluster in + root directory */ + __le16 info_sector; /* filesystem info sector */ + __le16 backup_boot; /* backup boot sector */ + __le16 reserved2[6]; /* Unused */ + /* Extended BPB Fields for FAT32 */ + __u8 drive_number; /* Physical drive number */ + __u8 state; /* undocumented, but used + for mount state. */ + /* other fiealds are not added here */ + } fat32; + }; }; struct fat_boot_fsinfo { diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index dfb514472cbc..4f52549b23ff 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -33,13 +33,14 @@ enum { NBD_CMD_READ = 0, NBD_CMD_WRITE = 1, NBD_CMD_DISC = 2, - /* there is a gap here to match userspace */ + NBD_CMD_FLUSH = 3, NBD_CMD_TRIM = 4 }; /* values for flags field */ #define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */ #define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */ +#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ /* there is a gap here to match userspace */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index 26607bd965fa..e4629b93bdd6 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -15,19 +15,22 @@ /* Namespaces */ #define XATTR_OS2_PREFIX "os2." -#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1) +#define XATTR_OS2_PREFIX_LEN (sizeof(XATTR_OS2_PREFIX) - 1) + +#define XATTR_MAC_OSX_PREFIX "osx." +#define XATTR_MAC_OSX_PREFIX_LEN (sizeof(XATTR_MAC_OSX_PREFIX) - 1) #define XATTR_SECURITY_PREFIX "security." -#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1) +#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1) #define XATTR_SYSTEM_PREFIX "system." -#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1) +#define XATTR_SYSTEM_PREFIX_LEN (sizeof(XATTR_SYSTEM_PREFIX) - 1) #define XATTR_TRUSTED_PREFIX "trusted." -#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1) +#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1) #define XATTR_USER_PREFIX "user." -#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1) +#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1) /* Security namespace */ #define XATTR_EVM_SUFFIX "evm" |