Loading block/blk-flush.c +5 −0 Original line number Diff line number Diff line Loading @@ -252,6 +252,11 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) spin_unlock_irqrestore(&fq->mq_flush_lock, flags); } bool is_flush_rq(struct request *rq) { return rq->end_io == flush_end_io; } /** * blk_kick_flush - consider issuing flush request * @q: request_queue being kicked Loading block/blk-mq-tag.c +41 −11 Original line number Diff line number Diff line Loading @@ -212,6 +212,22 @@ struct bt_iter_data { bool reserved; }; static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags, unsigned int bitnr) { struct request *rq; unsigned long flags; struct ext_blk_mq_tags *etags; etags = container_of(tags, struct ext_blk_mq_tags, tags); spin_lock_irqsave(&etags->lock, flags); rq = tags->rqs[bitnr]; if (!rq || !refcount_inc_not_zero(&rq->ref)) rq = NULL; spin_unlock_irqrestore(&etags->lock, flags); return rq; } static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) { struct bt_iter_data *iter_data = data; Loading @@ -219,18 +235,23 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) struct blk_mq_tags *tags = hctx->tags; bool reserved = iter_data->reserved; struct request *rq; bool ret = true; if (!reserved) bitnr += tags->nr_reserved_tags; rq = tags->rqs[bitnr]; /* * We can hit rq == NULL here, because the tagging functions * test and set the bit before assigning ->rqs[]. */ if (rq && rq->q == hctx->queue) return iter_data->fn(hctx, rq, iter_data->data, reserved); rq = blk_mq_find_and_get_req(tags, bitnr); if (!rq) return true; if (rq->q == hctx->queue && rq->mq_hctx == hctx) ret = iter_data->fn(hctx, rq, iter_data->data, reserved); blk_mq_put_rq_ref(rq); return ret; } /** Loading Loading @@ -273,6 +294,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) struct blk_mq_tags *tags = iter_data->tags; bool reserved = iter_data->reserved; struct request *rq; bool ret = true; if (!reserved) bitnr += tags->nr_reserved_tags; Loading @@ -281,11 +303,13 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * We can hit rq == NULL here, because the tagging functions * test and set the bit before assining ->rqs[]. */ rq = tags->rqs[bitnr]; if (rq && blk_mq_request_started(rq)) return iter_data->fn(rq, iter_data->data, reserved); rq = blk_mq_find_and_get_req(tags, bitnr); if (!rq) return true; if (blk_mq_request_started(rq)) ret = iter_data->fn(rq, iter_data->data, reserved); blk_mq_put_rq_ref(rq); return ret; } /** Loading Loading @@ -342,6 +366,9 @@ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, * indicates whether or not @rq is a reserved request. Return * true to continue iterating tags, false to stop. * @priv: Will be passed as second argument to @fn. * * We grab one request reference before calling @fn and release it after * @fn returns. */ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv) Loading Loading @@ -465,18 +492,21 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, int node, int alloc_policy) { struct blk_mq_tags *tags; struct ext_blk_mq_tags *etags; if (total_tags > BLK_MQ_TAG_MAX) { pr_err("blk-mq: tag depth too large\n"); return NULL; } tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node); if (!tags) etags = kzalloc_node(sizeof(*etags), GFP_KERNEL, node); if (!etags) return NULL; tags = &etags->tags; tags->nr_tags = total_tags; tags->nr_reserved_tags = reserved_tags; spin_lock_init(&etags->lock); return blk_mq_init_bitmap_tags(tags, node, alloc_policy); } Loading block/blk-mq-tag.h +15 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,21 @@ struct blk_mq_tags { struct list_head page_list; }; /* * Extended tag address space map. This was needed * to add a spinlock to blk_mq_tags in a KMI compliant * way (no changes could be made to struct blk_mq_tags). */ struct ext_blk_mq_tags { struct blk_mq_tags tags; /* * used to clear request reference in rqs[] before freeing one * request pool */ spinlock_t lock; }; extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy); extern void blk_mq_free_tags(struct blk_mq_tags *tags); Loading block/blk-mq.c +88 −11 Original line number Diff line number Diff line Loading @@ -886,6 +886,14 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next) return false; } void blk_mq_put_rq_ref(struct request *rq) { if (is_flush_rq(rq)) rq->end_io(rq, 0); else if (refcount_dec_and_test(&rq->ref)) __blk_mq_free_request(rq); } static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) { Loading Loading @@ -919,11 +927,7 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, if (blk_mq_req_expired(rq, next)) blk_mq_rq_timed_out(rq, reserved); if (is_flush_rq(rq, hctx)) rq->end_io(rq, 0); else if (refcount_dec_and_test(&rq->ref)) __blk_mq_free_request(rq); blk_mq_put_rq_ref(rq); return true; } Loading Loading @@ -1971,6 +1975,47 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) } } static size_t order_to_size(unsigned int order) { return (size_t)PAGE_SIZE << order; } /* called before freeing request pool in @tags */ static void blk_mq_clear_rq_mapping(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx) { struct blk_mq_tags *drv_tags = set->tags[hctx_idx]; struct ext_blk_mq_tags *drv_etags; struct page *page; unsigned long flags; list_for_each_entry(page, &tags->page_list, lru) { unsigned long start = (unsigned long)page_address(page); unsigned long end = start + order_to_size(page->private); int i; for (i = 0; i < set->queue_depth; i++) { struct request *rq = drv_tags->rqs[i]; unsigned long rq_addr = (unsigned long)rq; if (rq_addr >= start && rq_addr < end) { WARN_ON_ONCE(refcount_read(&rq->ref) != 0); cmpxchg(&drv_tags->rqs[i], rq, NULL); } } } /* * Wait until all pending iteration is done. * * Request reference is cleared and it is guaranteed to be observed * after the ->lock is released. */ drv_etags = container_of(drv_tags, struct ext_blk_mq_tags, tags); spin_lock_irqsave(&drv_etags->lock, flags); spin_unlock_irqrestore(&drv_etags->lock, flags); } static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = op_is_sync(bio->bi_opf); Loading Loading @@ -2096,6 +2141,8 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, } } blk_mq_clear_rq_mapping(set, tags, hctx_idx); while (!list_empty(&tags->page_list)) { page = list_first_entry(&tags->page_list, struct page, lru); list_del_init(&page->lru); Loading Loading @@ -2155,11 +2202,6 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, return tags; } static size_t order_to_size(unsigned int order) { return (size_t)PAGE_SIZE << order; } static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, int node) { Loading Loading @@ -2292,16 +2334,51 @@ static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx) &hctx->cpuhp_dead); } /* * Before freeing hw queue, clearing the flush request reference in * tags->rqs[] for avoiding potential UAF. */ static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags, unsigned int queue_depth, struct request *flush_rq) { int i; unsigned long flags; struct ext_blk_mq_tags *etags; /* The hw queue may not be mapped yet */ if (!tags) return; WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0); for (i = 0; i < queue_depth; i++) cmpxchg(&tags->rqs[i], flush_rq, NULL); /* * Wait until all pending iteration is done. * * Request reference is cleared and it is guaranteed to be observed * after the ->lock is released. */ etags = container_of(tags, struct ext_blk_mq_tags, tags); spin_lock_irqsave(&etags->lock, flags); spin_unlock_irqrestore(&etags->lock, flags); } /* hctx->ctxs will be freed in queue's release handler */ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { struct request *flush_rq = hctx->fq->flush_rq; if (blk_mq_hw_queue_mapped(hctx)) blk_mq_tag_idle(hctx); blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx], set->queue_depth, flush_rq); if (set->ops->exit_request) set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx); set->ops->exit_request(set, flush_rq, hctx_idx); if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); Loading block/blk-mq.h +1 −0 Original line number Diff line number Diff line Loading @@ -47,6 +47,7 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); bool blk_mq_get_driver_tag(struct request *rq); struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *start); void blk_mq_put_rq_ref(struct request *rq); /* * Internal helpers for allocating/freeing the request map Loading Loading
block/blk-flush.c +5 −0 Original line number Diff line number Diff line Loading @@ -252,6 +252,11 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) spin_unlock_irqrestore(&fq->mq_flush_lock, flags); } bool is_flush_rq(struct request *rq) { return rq->end_io == flush_end_io; } /** * blk_kick_flush - consider issuing flush request * @q: request_queue being kicked Loading
block/blk-mq-tag.c +41 −11 Original line number Diff line number Diff line Loading @@ -212,6 +212,22 @@ struct bt_iter_data { bool reserved; }; static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags, unsigned int bitnr) { struct request *rq; unsigned long flags; struct ext_blk_mq_tags *etags; etags = container_of(tags, struct ext_blk_mq_tags, tags); spin_lock_irqsave(&etags->lock, flags); rq = tags->rqs[bitnr]; if (!rq || !refcount_inc_not_zero(&rq->ref)) rq = NULL; spin_unlock_irqrestore(&etags->lock, flags); return rq; } static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) { struct bt_iter_data *iter_data = data; Loading @@ -219,18 +235,23 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) struct blk_mq_tags *tags = hctx->tags; bool reserved = iter_data->reserved; struct request *rq; bool ret = true; if (!reserved) bitnr += tags->nr_reserved_tags; rq = tags->rqs[bitnr]; /* * We can hit rq == NULL here, because the tagging functions * test and set the bit before assigning ->rqs[]. */ if (rq && rq->q == hctx->queue) return iter_data->fn(hctx, rq, iter_data->data, reserved); rq = blk_mq_find_and_get_req(tags, bitnr); if (!rq) return true; if (rq->q == hctx->queue && rq->mq_hctx == hctx) ret = iter_data->fn(hctx, rq, iter_data->data, reserved); blk_mq_put_rq_ref(rq); return ret; } /** Loading Loading @@ -273,6 +294,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) struct blk_mq_tags *tags = iter_data->tags; bool reserved = iter_data->reserved; struct request *rq; bool ret = true; if (!reserved) bitnr += tags->nr_reserved_tags; Loading @@ -281,11 +303,13 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * We can hit rq == NULL here, because the tagging functions * test and set the bit before assining ->rqs[]. */ rq = tags->rqs[bitnr]; if (rq && blk_mq_request_started(rq)) return iter_data->fn(rq, iter_data->data, reserved); rq = blk_mq_find_and_get_req(tags, bitnr); if (!rq) return true; if (blk_mq_request_started(rq)) ret = iter_data->fn(rq, iter_data->data, reserved); blk_mq_put_rq_ref(rq); return ret; } /** Loading Loading @@ -342,6 +366,9 @@ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, * indicates whether or not @rq is a reserved request. Return * true to continue iterating tags, false to stop. * @priv: Will be passed as second argument to @fn. * * We grab one request reference before calling @fn and release it after * @fn returns. */ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv) Loading Loading @@ -465,18 +492,21 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, int node, int alloc_policy) { struct blk_mq_tags *tags; struct ext_blk_mq_tags *etags; if (total_tags > BLK_MQ_TAG_MAX) { pr_err("blk-mq: tag depth too large\n"); return NULL; } tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node); if (!tags) etags = kzalloc_node(sizeof(*etags), GFP_KERNEL, node); if (!etags) return NULL; tags = &etags->tags; tags->nr_tags = total_tags; tags->nr_reserved_tags = reserved_tags; spin_lock_init(&etags->lock); return blk_mq_init_bitmap_tags(tags, node, alloc_policy); } Loading
block/blk-mq-tag.h +15 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,21 @@ struct blk_mq_tags { struct list_head page_list; }; /* * Extended tag address space map. This was needed * to add a spinlock to blk_mq_tags in a KMI compliant * way (no changes could be made to struct blk_mq_tags). */ struct ext_blk_mq_tags { struct blk_mq_tags tags; /* * used to clear request reference in rqs[] before freeing one * request pool */ spinlock_t lock; }; extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy); extern void blk_mq_free_tags(struct blk_mq_tags *tags); Loading
block/blk-mq.c +88 −11 Original line number Diff line number Diff line Loading @@ -886,6 +886,14 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next) return false; } void blk_mq_put_rq_ref(struct request *rq) { if (is_flush_rq(rq)) rq->end_io(rq, 0); else if (refcount_dec_and_test(&rq->ref)) __blk_mq_free_request(rq); } static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) { Loading Loading @@ -919,11 +927,7 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, if (blk_mq_req_expired(rq, next)) blk_mq_rq_timed_out(rq, reserved); if (is_flush_rq(rq, hctx)) rq->end_io(rq, 0); else if (refcount_dec_and_test(&rq->ref)) __blk_mq_free_request(rq); blk_mq_put_rq_ref(rq); return true; } Loading Loading @@ -1971,6 +1975,47 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) } } static size_t order_to_size(unsigned int order) { return (size_t)PAGE_SIZE << order; } /* called before freeing request pool in @tags */ static void blk_mq_clear_rq_mapping(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx) { struct blk_mq_tags *drv_tags = set->tags[hctx_idx]; struct ext_blk_mq_tags *drv_etags; struct page *page; unsigned long flags; list_for_each_entry(page, &tags->page_list, lru) { unsigned long start = (unsigned long)page_address(page); unsigned long end = start + order_to_size(page->private); int i; for (i = 0; i < set->queue_depth; i++) { struct request *rq = drv_tags->rqs[i]; unsigned long rq_addr = (unsigned long)rq; if (rq_addr >= start && rq_addr < end) { WARN_ON_ONCE(refcount_read(&rq->ref) != 0); cmpxchg(&drv_tags->rqs[i], rq, NULL); } } } /* * Wait until all pending iteration is done. * * Request reference is cleared and it is guaranteed to be observed * after the ->lock is released. */ drv_etags = container_of(drv_tags, struct ext_blk_mq_tags, tags); spin_lock_irqsave(&drv_etags->lock, flags); spin_unlock_irqrestore(&drv_etags->lock, flags); } static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = op_is_sync(bio->bi_opf); Loading Loading @@ -2096,6 +2141,8 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, } } blk_mq_clear_rq_mapping(set, tags, hctx_idx); while (!list_empty(&tags->page_list)) { page = list_first_entry(&tags->page_list, struct page, lru); list_del_init(&page->lru); Loading Loading @@ -2155,11 +2202,6 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, return tags; } static size_t order_to_size(unsigned int order) { return (size_t)PAGE_SIZE << order; } static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, int node) { Loading Loading @@ -2292,16 +2334,51 @@ static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx) &hctx->cpuhp_dead); } /* * Before freeing hw queue, clearing the flush request reference in * tags->rqs[] for avoiding potential UAF. */ static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags, unsigned int queue_depth, struct request *flush_rq) { int i; unsigned long flags; struct ext_blk_mq_tags *etags; /* The hw queue may not be mapped yet */ if (!tags) return; WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0); for (i = 0; i < queue_depth; i++) cmpxchg(&tags->rqs[i], flush_rq, NULL); /* * Wait until all pending iteration is done. * * Request reference is cleared and it is guaranteed to be observed * after the ->lock is released. */ etags = container_of(tags, struct ext_blk_mq_tags, tags); spin_lock_irqsave(&etags->lock, flags); spin_unlock_irqrestore(&etags->lock, flags); } /* hctx->ctxs will be freed in queue's release handler */ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { struct request *flush_rq = hctx->fq->flush_rq; if (blk_mq_hw_queue_mapped(hctx)) blk_mq_tag_idle(hctx); blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx], set->queue_depth, flush_rq); if (set->ops->exit_request) set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx); set->ops->exit_request(set, flush_rq, hctx_idx); if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); Loading
block/blk-mq.h +1 −0 Original line number Diff line number Diff line Loading @@ -47,6 +47,7 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); bool blk_mq_get_driver_tag(struct request *rq); struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *start); void blk_mq_put_rq_ref(struct request *rq); /* * Internal helpers for allocating/freeing the request map Loading