Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6118b70b authored by Jens Axboe's avatar Jens Axboe
Browse files

cfq-iosched: get rid of the need for __GFP_NOFAIL in cfq_find_alloc_queue()



Setup an emergency fallback cfqq that we allocate at IO scheduler init
time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
never fails without having to ensure free memory.

On cfqq lookup, always try to allocate a new cfqq if the given cfq io
context has the oom_cfqq assigned. This ensures that we only temporarily
punt to this shared queue.

Reviewed-by: default avatarJeff Moyer <jmoyer@redhat.com>
Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent d5036d77
Loading
Loading
Loading
Loading
+73 −64
Original line number Original line Diff line number Diff line
@@ -70,6 +70,51 @@ struct cfq_rb_root {
};
};
#define CFQ_RB_ROOT	(struct cfq_rb_root) { RB_ROOT, NULL, }
#define CFQ_RB_ROOT	(struct cfq_rb_root) { RB_ROOT, NULL, }


/*
 * Per process-grouping structure
 */
struct cfq_queue {
	/* reference count */
	atomic_t ref;
	/* various state flags, see below */
	unsigned int flags;
	/* parent cfq_data */
	struct cfq_data *cfqd;
	/* service_tree member */
	struct rb_node rb_node;
	/* service_tree key */
	unsigned long rb_key;
	/* prio tree member */
	struct rb_node p_node;
	/* prio tree root we belong to, if any */
	struct rb_root *p_root;
	/* sorted list of pending requests */
	struct rb_root sort_list;
	/* if fifo isn't expired, next request to serve */
	struct request *next_rq;
	/* requests queued in sort_list */
	int queued[2];
	/* currently allocated requests */
	int allocated[2];
	/* fifo list of requests in sort_list */
	struct list_head fifo;

	unsigned long slice_end;
	long slice_resid;
	unsigned int slice_dispatch;

	/* pending metadata requests */
	int meta_pending;
	/* number of requests that are on the dispatch list or inside driver */
	int dispatched;

	/* io prio of this group */
	unsigned short ioprio, org_ioprio;
	unsigned short ioprio_class, org_ioprio_class;

	pid_t pid;
};

/*
/*
 * Per block device queue structure
 * Per block device queue structure
 */
 */
@@ -135,51 +180,11 @@ struct cfq_data {
	unsigned int cfq_slice_idle;
	unsigned int cfq_slice_idle;


	struct list_head cic_list;
	struct list_head cic_list;
};


	/*
	/*
 * Per process-grouping structure
	 * Fallback dummy cfqq for extreme OOM conditions
	 */
	 */
struct cfq_queue {
	struct cfq_queue oom_cfqq;
	/* reference count */
	atomic_t ref;
	/* various state flags, see below */
	unsigned int flags;
	/* parent cfq_data */
	struct cfq_data *cfqd;
	/* service_tree member */
	struct rb_node rb_node;
	/* service_tree key */
	unsigned long rb_key;
	/* prio tree member */
	struct rb_node p_node;
	/* prio tree root we belong to, if any */
	struct rb_root *p_root;
	/* sorted list of pending requests */
	struct rb_root sort_list;
	/* if fifo isn't expired, next request to serve */
	struct request *next_rq;
	/* requests queued in sort_list */
	int queued[2];
	/* currently allocated requests */
	int allocated[2];
	/* fifo list of requests in sort_list */
	struct list_head fifo;

	unsigned long slice_end;
	long slice_resid;
	unsigned int slice_dispatch;

	/* pending metadata requests */
	int meta_pending;
	/* number of requests that are on the dispatch list or inside driver */
	int dispatched;

	/* io prio of this group */
	unsigned short ioprio, org_ioprio;
	unsigned short ioprio_class, org_ioprio_class;

	pid_t pid;
};
};


enum cfqq_state_flags {
enum cfqq_state_flags {
@@ -1673,41 +1678,40 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
	/* cic always exists here */
	/* cic always exists here */
	cfqq = cic_to_cfqq(cic, is_sync);
	cfqq = cic_to_cfqq(cic, is_sync);


	if (!cfqq) {
	/*
	 * Always try a new alloc if we fell back to the OOM cfqq
	 * originally, since it should just be a temporary situation.
	 */
	if (!cfqq || cfqq == &cfqd->oom_cfqq) {
		cfqq = NULL;
		if (new_cfqq) {
		if (new_cfqq) {
			cfqq = new_cfqq;
			cfqq = new_cfqq;
			new_cfqq = NULL;
			new_cfqq = NULL;
		} else if (gfp_mask & __GFP_WAIT) {
		} else if (gfp_mask & __GFP_WAIT) {
			/*
			 * Inform the allocator of the fact that we will
			 * just repeat this allocation if it fails, to allow
			 * the allocator to do whatever it needs to attempt to
			 * free memory.
			 */
			spin_unlock_irq(cfqd->queue->queue_lock);
			spin_unlock_irq(cfqd->queue->queue_lock);
			new_cfqq = kmem_cache_alloc_node(cfq_pool,
			new_cfqq = kmem_cache_alloc_node(cfq_pool,
					gfp_mask | __GFP_NOFAIL | __GFP_ZERO,
					gfp_mask | __GFP_ZERO,
					cfqd->queue->node);
					cfqd->queue->node);
			spin_lock_irq(cfqd->queue->queue_lock);
			spin_lock_irq(cfqd->queue->queue_lock);
			if (new_cfqq)
				goto retry;
				goto retry;
		} else {
		} else {
			cfqq = kmem_cache_alloc_node(cfq_pool,
			cfqq = kmem_cache_alloc_node(cfq_pool,
					gfp_mask | __GFP_ZERO,
					gfp_mask | __GFP_ZERO,
					cfqd->queue->node);
					cfqd->queue->node);
			if (!cfqq)
				goto out;
		}
		}


		if (cfqq) {
			cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
			cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
			cfq_init_prio_data(cfqq, ioc);
			cfq_init_prio_data(cfqq, ioc);
			cfq_log_cfqq(cfqd, cfqq, "alloced");
			cfq_log_cfqq(cfqd, cfqq, "alloced");
		} else
			cfqq = &cfqd->oom_cfqq;
	}
	}


	if (new_cfqq)
	if (new_cfqq)
		kmem_cache_free(cfq_pool, new_cfqq);
		kmem_cache_free(cfq_pool, new_cfqq);


out:
	WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
	return cfqq;
	return cfqq;
}
}


@@ -1740,11 +1744,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
		cfqq = *async_cfqq;
		cfqq = *async_cfqq;
	}
	}


	if (!cfqq) {
		cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
	if (!cfqq)
	if (!cfqq)
			return NULL;
		cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
	}


	/*
	/*
	 * pin the queue now that it's allocated, scheduler exit will prune it
	 * pin the queue now that it's allocated, scheduler exit will prune it
@@ -2470,6 +2471,14 @@ static void *cfq_init_queue(struct request_queue *q)
	for (i = 0; i < CFQ_PRIO_LISTS; i++)
	for (i = 0; i < CFQ_PRIO_LISTS; i++)
		cfqd->prio_trees[i] = RB_ROOT;
		cfqd->prio_trees[i] = RB_ROOT;


	/*
	 * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
	 * Grab a permanent reference to it, so that the normal code flow
	 * will not attempt to free it.
	 */
	cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
	atomic_inc(&cfqd->oom_cfqq.ref);

	INIT_LIST_HEAD(&cfqd->cic_list);
	INIT_LIST_HEAD(&cfqd->cic_list);


	cfqd->queue = q;
	cfqd->queue = q;