diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-30 08:52:42 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-30 08:52:42 -0700 |
commit | 0d167518e045cc8bb63f0a8a0a85ad4fa4e0044f (patch) | |
tree | 101a9b5d425d79f663e4f25f1e90b7a8cc6604f1 /include | |
parent | 2f83766d4b18774c856329a8fca4c9338dfeda39 (diff) | |
parent | ff26eaadf4d914e397872b99885d45756104e9ae (diff) | |
download | lwn-0d167518e045cc8bb63f0a8a0a85ad4fa4e0044f.tar.gz lwn-0d167518e045cc8bb63f0a8a0a85ad4fa4e0044f.zip |
Merge branch 'for-3.5/core' of git://git.kernel.dk/linux-block
Merge block/IO core bits from Jens Axboe:
"This is a bit bigger on the core side than usual, but that is purely
because we decided to hold off on parts of Tejun's submission on 3.4
to give it a bit more time to simmer. As a consequence, it's seen a
long cycle in for-next.
It contains:
- Bug fix from Dan, wrong locking type.
- Relax splice gifting restriction from Eric.
- A ton of updates from Tejun, primarily for blkcg. This improves
the code a lot, making the API nicer and cleaner, and also includes
fixes for how we handle and tie policies and re-activate on
switches. The changes also include generic bug fixes.
- A simple fix from Vivek, along with a fix for doing proper delayed
allocation of the blkcg stats."
Fix up annoying conflict just due to different merge resolution in
Documentation/feature-removal-schedule.txt
* 'for-3.5/core' of git://git.kernel.dk/linux-block: (92 commits)
blkcg: tg_stats_alloc_lock is an irq lock
vmsplice: relax alignement requirements for SPLICE_F_GIFT
blkcg: use radix tree to index blkgs from blkcg
blkcg: fix blkcg->css ref leak in __blkg_lookup_create()
block: fix elvpriv allocation failure handling
block: collapse blk_alloc_request() into get_request()
blkcg: collapse blkcg_policy_ops into blkcg_policy
blkcg: embed struct blkg_policy_data in policy specific data
blkcg: mass rename of blkcg API
blkcg: style cleanups for blk-cgroup.h
blkcg: remove blkio_group->path[]
blkcg: blkg_rwstat_read() was missing inline
blkcg: shoot down blkgs if all policies are deactivated
blkcg: drop stuff unused after per-queue policy activation update
blkcg: implement per-queue policy activation
blkcg: add request_queue->root_blkg
blkcg: make request_queue bypassing on allocation
blkcg: make sure blkg_lookup() returns %NULL if @q is bypassing
blkcg: make blkg_conf_prep() take @pol and return with queue lock held
blkcg: remove static policy ID enums
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bio.h | 8 | ||||
-rw-r--r-- | include/linux/blk_types.h | 10 | ||||
-rw-r--r-- | include/linux/blkdev.h | 20 | ||||
-rw-r--r-- | include/linux/elevator.h | 8 | ||||
-rw-r--r-- | include/linux/iocontext.h | 39 | ||||
-rw-r--r-- | include/linux/ioprio.h | 22 |
6 files changed, 69 insertions, 38 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h index 4d94eb8bcbcc..26435890dc87 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -269,6 +269,14 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); +#ifdef CONFIG_BLK_CGROUP +int bio_associate_current(struct bio *bio); +void bio_disassociate_task(struct bio *bio); +#else /* CONFIG_BLK_CGROUP */ +static inline int bio_associate_current(struct bio *bio) { return -ENOENT; } +static inline void bio_disassociate_task(struct bio *bio) { } +#endif /* CONFIG_BLK_CGROUP */ + /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 4053cbd4490e..0edb65dd8edd 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -14,6 +14,8 @@ struct bio; struct bio_integrity_payload; struct page; struct block_device; +struct io_context; +struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *, int); typedef void (bio_destructor_t) (struct bio *); @@ -66,6 +68,14 @@ struct bio { bio_end_io_t *bi_end_io; void *bi_private; +#ifdef CONFIG_BLK_CGROUP + /* + * Optional ioc and css associated with this bio. Put on bio + * release. Read comment on top of bio_associate_current(). + */ + struct io_context *bi_ioc; + struct cgroup_subsys_state *bi_css; +#endif #if defined(CONFIG_BLK_DEV_INTEGRITY) struct bio_integrity_payload *bi_integrity; /* data integrity */ #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4d4ac24a263e..ba43f408baa3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -32,10 +32,17 @@ struct blk_trace; struct request; struct sg_io_hdr; struct bsg_job; +struct blkcg_gq; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ +/* + * Maximum number of blkcg policies allowed to be registered concurrently. + * Defined here to simplify include dependency. + */ +#define BLKCG_MAX_POLS 2 + struct request; typedef void (rq_end_io_fn)(struct request *, int); @@ -363,6 +370,11 @@ struct request_queue { struct list_head timeout_list; struct list_head icq_list; +#ifdef CONFIG_BLK_CGROUP + DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); + struct blkcg_gq *root_blkg; + struct list_head blkg_list; +#endif struct queue_limits limits; @@ -390,12 +402,17 @@ struct request_queue { struct mutex sysfs_lock; + int bypass_depth; + #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; int bsg_job_size; struct bsg_class_device bsg_dev; #endif +#ifdef CONFIG_BLK_CGROUP + struct list_head all_q_node; +#endif #ifdef CONFIG_BLK_DEV_THROTTLING /* Throttle data */ struct throtl_data *td; @@ -407,7 +424,7 @@ struct request_queue { #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ -#define QUEUE_FLAG_ELVSWITCH 6 /* don't use elevator, just do FIFO */ +#define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */ #define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */ @@ -491,6 +508,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) +#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 7d4e0356f329..c03af7687bb4 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -28,12 +28,13 @@ typedef int (elevator_may_queue_fn) (struct request_queue *, int); typedef void (elevator_init_icq_fn) (struct io_cq *); typedef void (elevator_exit_icq_fn) (struct io_cq *); -typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t); +typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, + struct bio *, gfp_t); typedef void (elevator_put_req_fn) (struct request *); typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); -typedef void *(elevator_init_fn) (struct request_queue *); +typedef int (elevator_init_fn) (struct request_queue *); typedef void (elevator_exit_fn) (struct elevator_queue *); struct elevator_ops @@ -129,7 +130,8 @@ extern void elv_unregister_queue(struct request_queue *q); extern int elv_may_queue(struct request_queue *, int); extern void elv_abort_queue(struct request_queue *); extern void elv_completed_request(struct request_queue *, struct request *); -extern int elv_set_request(struct request_queue *, struct request *, gfp_t); +extern int elv_set_request(struct request_queue *q, struct request *rq, + struct bio *bio, gfp_t gfp_mask); extern void elv_put_request(struct request_queue *, struct request *); extern void elv_drain_elevator(struct request_queue *); diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 1a3018063034..df38db2ef45b 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -6,11 +6,7 @@ #include <linux/workqueue.h> enum { - ICQ_IOPRIO_CHANGED = 1 << 0, - ICQ_CGROUP_CHANGED = 1 << 1, ICQ_EXITED = 1 << 2, - - ICQ_CHANGED_MASK = ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED, }; /* @@ -100,6 +96,7 @@ struct io_cq { */ struct io_context { atomic_long_t refcount; + atomic_t active_ref; atomic_t nr_tasks; /* all the fields below are protected by this lock */ @@ -120,29 +117,37 @@ struct io_context { struct work_struct release_work; }; -static inline struct io_context *ioc_task_link(struct io_context *ioc) +/** + * get_io_context_active - get active reference on ioc + * @ioc: ioc of interest + * + * Only iocs with active reference can issue new IOs. This function + * acquires an active reference on @ioc. The caller must already have an + * active reference on @ioc. + */ +static inline void get_io_context_active(struct io_context *ioc) { - /* - * if ref count is zero, don't allow sharing (ioc is going away, it's - * a race). - */ - if (ioc && atomic_long_inc_not_zero(&ioc->refcount)) { - atomic_inc(&ioc->nr_tasks); - return ioc; - } + WARN_ON_ONCE(atomic_long_read(&ioc->refcount) <= 0); + WARN_ON_ONCE(atomic_read(&ioc->active_ref) <= 0); + atomic_long_inc(&ioc->refcount); + atomic_inc(&ioc->active_ref); +} + +static inline void ioc_task_link(struct io_context *ioc) +{ + get_io_context_active(ioc); - return NULL; + WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0); + atomic_inc(&ioc->nr_tasks); } struct task_struct; #ifdef CONFIG_BLOCK void put_io_context(struct io_context *ioc); +void put_io_context_active(struct io_context *ioc); void exit_io_context(struct task_struct *task); struct io_context *get_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node); -void ioc_ioprio_changed(struct io_context *ioc, int ioprio); -void ioc_cgroup_changed(struct io_context *ioc); -unsigned int icq_get_changed(struct io_cq *icq); #else struct io_context; static inline void put_io_context(struct io_context *ioc) { } diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 76dad4808847..beb9ce1c2c23 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -42,26 +42,14 @@ enum { }; /* - * if process has set io priority explicitly, use that. if not, convert - * the cpu scheduler nice value to an io priority + * Fallback BE priority */ #define IOPRIO_NORM (4) -static inline int task_ioprio(struct io_context *ioc) -{ - if (ioprio_valid(ioc->ioprio)) - return IOPRIO_PRIO_DATA(ioc->ioprio); - - return IOPRIO_NORM; -} - -static inline int task_ioprio_class(struct io_context *ioc) -{ - if (ioprio_valid(ioc->ioprio)) - return IOPRIO_PRIO_CLASS(ioc->ioprio); - - return IOPRIO_CLASS_BE; -} +/* + * if process has set io priority explicitly, use that. if not, convert + * the cpu scheduler nice value to an io priority + */ static inline int task_nice_ioprio(struct task_struct *task) { return (task_nice(task) + 20) / 5; |