Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 55484c45 authored by Jack Steiner's avatar Jack Steiner Committed by Linus Torvalds
Browse files

gru: allow users to specify gru chiplet 2



Add support to the GRU driver to allow users to specify the blade &
chiplet for allocation of GRU contexts.  Add new statistics for context
loading/unloading/retargeting.  Also deleted a few GRU stats that were no
longer being unused.

Signed-off-by: default avatarJack Steiner <steiner@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 518e5cd4
Loading
Loading
Loading
Loading
+2 −11
Original line number Diff line number Diff line
@@ -546,17 +546,7 @@ int gru_handle_user_call_os(unsigned long cb)
	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
		goto exit;

	/*
	 * If force_unload is set, the UPM TLB fault is phony. The task
	 * has migrated to another node and the GSEG must be moved. Just
	 * unload the context. The task will page fault and assign a new
	 * context.
	 */
	if (gts->ts_tgid_owner == current->tgid && gts->ts_blade >= 0 &&
				gts->ts_blade != uv_numa_blade_id()) {
		STAT(call_os_offnode_reference);
		gts->ts_force_unload = 1;
	}
	gru_check_context_placement(gts);

	/*
	 * CCH may contain stale data if ts_force_cch_reload is set.
@@ -771,6 +761,7 @@ int gru_set_context_option(unsigned long arg)
		} else {
			gts->ts_user_blade_id = req.val1;
			gts->ts_user_chiplet_id = req.val0;
			gru_check_context_placement(gts);
		}
		break;
	case sco_gseg_owner:
+8 −7
Original line number Diff line number Diff line
@@ -232,23 +232,24 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
 * system.
 */
static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
			     void *vaddr, int nid, int bid, int grunum)
			     void *vaddr, int blade_id, int chiplet_id)
{
	spin_lock_init(&gru->gs_lock);
	spin_lock_init(&gru->gs_asid_lock);
	gru->gs_gru_base_paddr = paddr;
	gru->gs_gru_base_vaddr = vaddr;
	gru->gs_gid = bid * GRU_CHIPLETS_PER_BLADE + grunum;
	gru->gs_blade = gru_base[bid];
	gru->gs_blade_id = bid;
	gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id;
	gru->gs_blade = gru_base[blade_id];
	gru->gs_blade_id = blade_id;
	gru->gs_chiplet_id = chiplet_id;
	gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
	gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
	gru->gs_asid_limit = MAX_ASID;
	gru_tgh_flush_init(gru);
	if (gru->gs_gid >= gru_max_gids)
		gru_max_gids = gru->gs_gid + 1;
	gru_dbg(grudev, "bid %d, nid %d, gid %d, vaddr %p (0x%lx)\n",
		bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr,
	gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n",
		blade_id, gru->gs_gid, gru->gs_gru_base_vaddr,
		gru->gs_gru_base_paddr);
}

@@ -283,7 +284,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
				chip++, gru++) {
			paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
			vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
			gru_init_chiplet(gru, paddr, vaddr, nid, bid, chip);
			gru_init_chiplet(gru, paddr, vaddr, bid, chip);
			n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
			cbrs = max(cbrs, n);
			n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+5 −3
Original line number Diff line number Diff line
@@ -160,8 +160,10 @@ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
	up_read(&bs->bs_kgts_sema);
	down_write(&bs->bs_kgts_sema);

	if (!bs->bs_kgts)
	if (!bs->bs_kgts) {
		bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0);
		bs->bs_kgts->ts_user_blade_id = blade_id;
	}
	kgts = bs->bs_kgts;

	if (!kgts->ts_gru) {
@@ -172,9 +174,9 @@ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
		kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
			GRU_NUM_KERNEL_DSR_BYTES * ncpus +
				bs->bs_async_dsr_bytes);
		while (!gru_assign_gru_context(kgts, blade_id)) {
		while (!gru_assign_gru_context(kgts)) {
			msleep(1);
			gru_steal_context(kgts, blade_id);
			gru_steal_context(kgts);
		}
		gru_load_context(kgts);
		gru = bs->bs_kgts->ts_gru;
+86 −42
Original line number Diff line number Diff line
@@ -684,6 +684,40 @@ static int gru_retarget_intr(struct gru_thread_state *gts)
	return gru_update_cch(gts, 0);
}

/*
 * Unload the gru context if it is not assigned to the correct blade or
 * chiplet. Misassignment can occur if the process migrates to a different
 * blade or if the user changes the selected blade/chiplet.
 * 	Return 0 if  context correct placed, otherwise 1
 */
void gru_check_context_placement(struct gru_thread_state *gts)
{
	struct gru_state *gru;
	int blade_id, chiplet_id;

	/*
	 * If the current task is the context owner, verify that the
	 * context is correctly placed. This test is skipped for non-owner
	 * references. Pthread apps use non-owner references to the CBRs.
	 */
	gru = gts->ts_gru;
	if (!gru || gts->ts_tgid_owner != current->tgid)
		return;

	blade_id = gts->ts_user_blade_id;
	if (blade_id < 0)
		blade_id = uv_numa_blade_id();

	chiplet_id = gts->ts_user_chiplet_id;
	if (gru->gs_blade_id != blade_id ||
	    (chiplet_id >= 0 && chiplet_id != gru->gs_chiplet_id)) {
		STAT(check_context_unload);
		gru_unload_context(gts, 1);
	} else if (gru_retarget_intr(gts)) {
		STAT(check_context_retarget_intr);
	}
}


/*
 * Insufficient GRU resources available on the local blade. Steal a context from
@@ -714,13 +748,17 @@ static void gts_stolen(struct gru_thread_state *gts,
	}
}

void gru_steal_context(struct gru_thread_state *gts, int blade_id)
void gru_steal_context(struct gru_thread_state *gts)
{
	struct gru_blade_state *blade;
	struct gru_state *gru, *gru0;
	struct gru_thread_state *ngts = NULL;
	int ctxnum, ctxnum0, flag = 0, cbr, dsr;
	int blade_id = gts->ts_user_blade_id;
	int chiplet_id = gts->ts_user_chiplet_id;

	if (blade_id < 0)
		blade_id = uv_numa_blade_id();
	cbr = gts->ts_cbr_au_count;
	dsr = gts->ts_dsr_au_count;

@@ -731,9 +769,12 @@ void gru_steal_context(struct gru_thread_state *gts, int blade_id)
	gru = blade->bs_lru_gru;
	if (ctxnum == 0)
		gru = next_gru(blade, gru);
	blade->bs_lru_gru = gru;
	blade->bs_lru_ctxnum = ctxnum;
	ctxnum0 = ctxnum;
	gru0 = gru;
	while (1) {
		if (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id) {
			if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
				break;
			spin_lock(&gru->gs_lock);
@@ -750,16 +791,17 @@ void gru_steal_context(struct gru_thread_state *gts, int blade_id)
				if (ngts && is_gts_stealable(ngts, blade))
					break;
				ngts = NULL;
			flag = 1;
			}
			spin_unlock(&gru->gs_lock);
			if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
				break;
		}
		if (flag && gru == gru0)
			break;
		flag = 1;
		ctxnum = 0;
		gru = next_gru(blade, gru);
	}
	blade->bs_lru_gru = gru;
	blade->bs_lru_ctxnum = ctxnum;
	spin_unlock(&blade->bs_lock);

	if (ngts) {
@@ -777,20 +819,36 @@ void gru_steal_context(struct gru_thread_state *gts, int blade_id)
		hweight64(gru->gs_dsr_map));
}

/*
 * Assign a gru context.
 */
static int gru_assign_context_number(struct gru_state *gru)
{
	int ctxnum;

	ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
	__set_bit(ctxnum, &gru->gs_context_map);
	return ctxnum;
}

/*
 * Scan the GRUs on the local blade & assign a GRU context.
 */
struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts,
						int blade)
struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
{
	struct gru_state *gru, *grux;
	int i, max_active_contexts;
	int blade_id = gts->ts_user_blade_id;
	int chiplet_id = gts->ts_user_chiplet_id;


	if (blade_id < 0)
		blade_id = uv_numa_blade_id();
again:
	gru = NULL;
	max_active_contexts = GRU_NUM_CCH;
	for_each_gru_on_blade(grux, blade, i) {
	for_each_gru_on_blade(grux, blade_id, i) {
		if (chiplet_id >= 0 && chiplet_id != grux->gs_chiplet_id)
			continue;
		if (check_gru_resources(grux, gts->ts_cbr_au_count,
					gts->ts_dsr_au_count,
					max_active_contexts)) {
@@ -811,12 +869,9 @@ struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts,
		reserve_gru_resources(gru, gts);
		gts->ts_gru = gru;
		gts->ts_blade = gru->gs_blade_id;
		gts->ts_ctxnum =
		    find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
		BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH);
		gts->ts_ctxnum = gru_assign_context_number(gru);
		atomic_inc(&gts->ts_refcnt);
		gru->gs_gts[gts->ts_ctxnum] = gts;
		__set_bit(gts->ts_ctxnum, &gru->gs_context_map);
		spin_unlock(&gru->gs_lock);

		STAT(assign_context);
@@ -844,7 +899,6 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct gru_thread_state *gts;
	unsigned long paddr, vaddr;
	int blade_id;

	vaddr = (unsigned long)vmf->virtual_address;
	gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
@@ -859,28 +913,18 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
again:
	mutex_lock(&gts->ts_ctxlock);
	preempt_disable();
	blade_id = uv_numa_blade_id();

	if (gts->ts_gru) {
		if (gts->ts_gru->gs_blade_id != blade_id) {
			STAT(migrated_nopfn_unload);
			gru_unload_context(gts, 1);
		} else {
			if (gru_retarget_intr(gts))
				STAT(migrated_nopfn_retarget);
		}
	}
	gru_check_context_placement(gts);

	if (!gts->ts_gru) {
		STAT(load_user_context);
		if (!gru_assign_gru_context(gts, blade_id)) {
		if (!gru_assign_gru_context(gts)) {
			preempt_enable();
			mutex_unlock(&gts->ts_ctxlock);
			set_current_state(TASK_INTERRUPTIBLE);
			schedule_timeout(GRU_ASSIGN_DELAY);  /* true hack ZZZ */
			blade_id = uv_numa_blade_id();
			if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
				gru_steal_context(gts, blade_id);
				gru_steal_context(gts);
			goto again;
		}
		gru_load_context(gts);
+2 −7
Original line number Diff line number Diff line
@@ -67,19 +67,14 @@ static int statistics_show(struct seq_file *s, void *p)
	printstat(s, intr);
	printstat(s, intr_mm_lock_failed);
	printstat(s, call_os);
	printstat(s, call_os_offnode_reference);
	printstat(s, call_os_check_for_bug);
	printstat(s, call_os_wait_queue);
	printstat(s, user_flush_tlb);
	printstat(s, user_unload_context);
	printstat(s, user_exception);
	printstat(s, set_context_option);
	printstat(s, migrate_check);
	printstat(s, migrated_retarget);
	printstat(s, migrated_unload);
	printstat(s, migrated_unload_delay);
	printstat(s, migrated_nopfn_retarget);
	printstat(s, migrated_nopfn_unload);
	printstat(s, check_context_retarget_intr);
	printstat(s, check_context_unload);
	printstat(s, tlb_dropin);
	printstat(s, tlb_dropin_fail_no_asid);
	printstat(s, tlb_dropin_fail_upm);
Loading