Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 841710aa authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe
Browse files

writeback: implement memcg wb_domain



Dirtyable memory is distributed to a wb (bdi_writeback) according to
the relative bandwidth the wb is writing out in the whole system.
This distribution is global - each wb is measured against all other
wb's and gets the proportinately sized portion of the memory in the
whole system.

For cgroup writeback, the amount of dirtyable memory is scoped by
memcg and thus each wb would need to be measured and controlled in its
memcg.  IOW, a wb will belong to two writeback domains - the global
and memcg domains.

The previous patches laid the groundwork to support the two wb_domains
and this patch implements memcg wb_domain.  memcg->cgwb_domain is
initialized on css online and destroyed on css release,
wb->memcg_completions is added, and __wb_writeout_inc() is updated to
increment completions against both global and memcg wb_domains.

The following patches will update balance_dirty_pages() and its
subroutines to actually consider memcg wb_domain for throttling.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 947e9762
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -118,6 +118,7 @@ struct bdi_writeback {

#ifdef CONFIG_CGROUP_WRITEBACK
	struct percpu_ref refcnt;	/* used only for !root wb's */
	struct fprop_local_percpu memcg_completions;
	struct cgroup_subsys_state *memcg_css; /* the associated memcg */
	struct cgroup_subsys_state *blkcg_css; /* and blkcg */
	struct list_head memcg_node;	/* anchored at memcg->cgwb_list */
+11 −1
Original line number Diff line number Diff line
@@ -389,8 +389,18 @@ enum {
};

#ifdef CONFIG_CGROUP_WRITEBACK

struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
#endif
struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);

#else	/* CONFIG_CGROUP_WRITEBACK */

static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
{
	return NULL;
}

#endif	/* CONFIG_CGROUP_WRITEBACK */

struct sock;
#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
+3 −0
Original line number Diff line number Diff line
@@ -167,6 +167,9 @@ static inline void laptop_sync_completion(void) { }
void throttle_vm_writeout(gfp_t gfp_mask);
bool zone_dirty_ok(struct zone *zone);
int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
#ifdef CONFIG_CGROUP_WRITEBACK
void wb_domain_exit(struct wb_domain *dom);
#endif

extern struct wb_domain global_wb_domain;

+8 −1
Original line number Diff line number Diff line
@@ -482,6 +482,7 @@ static void cgwb_release_workfn(struct work_struct *work)
	css_put(wb->blkcg_css);
	wb_congested_put(wb->congested);

	fprop_local_destroy_percpu(&wb->memcg_completions);
	percpu_ref_exit(&wb->refcnt);
	wb_exit(wb);
	kfree_rcu(wb, rcu);
@@ -548,9 +549,13 @@ static int cgwb_create(struct backing_dev_info *bdi,
	if (ret)
		goto err_wb_exit;

	ret = fprop_local_init_percpu(&wb->memcg_completions, gfp);
	if (ret)
		goto err_ref_exit;

	wb->congested = wb_congested_get_create(bdi, blkcg_css->id, gfp);
	if (!wb->congested)
		goto err_ref_exit;
		goto err_fprop_exit;

	wb->memcg_css = memcg_css;
	wb->blkcg_css = blkcg_css;
@@ -587,6 +592,8 @@ static int cgwb_create(struct backing_dev_info *bdi,

err_put_congested:
	wb_congested_put(wb->congested);
err_fprop_exit:
	fprop_local_destroy_percpu(&wb->memcg_completions);
err_ref_exit:
	percpu_ref_exit(&wb->refcnt);
err_wb_exit:
+39 −0
Original line number Diff line number Diff line
@@ -345,6 +345,7 @@ struct mem_cgroup {

#ifdef CONFIG_CGROUP_WRITEBACK
	struct list_head cgwb_list;
	struct wb_domain cgwb_domain;
#endif

	/* List of events which userspace want to receive */
@@ -3994,6 +3995,37 @@ struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg)
	return &memcg->cgwb_list;
}

static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
{
	return wb_domain_init(&memcg->cgwb_domain, gfp);
}

static void memcg_wb_domain_exit(struct mem_cgroup *memcg)
{
	wb_domain_exit(&memcg->cgwb_domain);
}

struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
{
	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);

	if (!memcg->css.parent)
		return NULL;

	return &memcg->cgwb_domain;
}

#else	/* CONFIG_CGROUP_WRITEBACK */

static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
{
	return 0;
}

static void memcg_wb_domain_exit(struct mem_cgroup *memcg)
{
}

#endif	/* CONFIG_CGROUP_WRITEBACK */

/*
@@ -4380,9 +4412,15 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
	memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
	if (!memcg->stat)
		goto out_free;

	if (memcg_wb_domain_init(memcg, GFP_KERNEL))
		goto out_free_stat;

	spin_lock_init(&memcg->pcp_counter_lock);
	return memcg;

out_free_stat:
	free_percpu(memcg->stat);
out_free:
	kfree(memcg);
	return NULL;
@@ -4409,6 +4447,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
		free_mem_cgroup_per_zone_info(memcg, node);

	free_percpu(memcg->stat);
	memcg_wb_domain_exit(memcg);
	kfree(memcg);
}

Loading