Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8b3d6663 authored by Arnd Bergmann's avatar Arnd Bergmann Committed by Paul Mackerras
Browse files

[PATCH] spufs: cooperative scheduler support



This adds a scheduler for SPUs to make it possible to use
more logical SPUs than physical ones are present in the
system.

Currently, there is no support for preempting a running
SPU thread, they have to leave the SPU by either triggering
an event on the SPU that causes it to return to the
owning thread or by sending a signal to it.

This patch also adds operations that enable accessing an SPU
in either runnable or saved state. We use an RW semaphore
to protect the state of the SPU from changing underneath
us, while we are holding it readable. In order to change
the state, it is acquired writeable and a context save
or restore is executed before downgrading the semaphore
to read-only.

From: Mark Nutter <mnutter@us.ibm.com>,
      Uli Weigand <Ulrich.Weigand@de.ibm.com>
Signed-off-by: default avatarArnd Bergmann <arndb@de.ibm.com>
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
parent 05b84117
Loading
Loading
Loading
Loading
+75 −0
Original line number Diff line number Diff line
@@ -68,6 +68,77 @@ void cell_show_cpuinfo(struct seq_file *m)
	of_node_put(root);
}

#ifdef CONFIG_SPARSEMEM
static int __init find_spu_node_id(struct device_node *spe)
{
	unsigned int *id;
#ifdef CONFIG_NUMA
	struct device_node *cpu;
	cpu = spe->parent->parent;
	id = (unsigned int *)get_property(cpu, "node-id", NULL);
#else
	id = NULL;
#endif
	return id ? *id : 0;
}

static void __init cell_spuprop_present(struct device_node *spe,
				       const char *prop, int early)
{
	struct address_prop {
		unsigned long address;
		unsigned int len;
	} __attribute__((packed)) *p;
	int proplen;

	unsigned long start_pfn, end_pfn, pfn;
	int node_id;

	p = (void*)get_property(spe, prop, &proplen);
	WARN_ON(proplen != sizeof (*p));

	node_id = find_spu_node_id(spe);

	start_pfn = p->address >> PAGE_SHIFT;
	end_pfn = (p->address + p->len + PAGE_SIZE - 1) >> PAGE_SHIFT;

	/* We need to call memory_present *before* the call to sparse_init,
	   but we can initialize the page structs only *after* that call.
	   Thus, we're being called twice. */
	if (early)
		memory_present(node_id, start_pfn, end_pfn);
	else {
		/* As the pages backing SPU LS and I/O are outside the range
		   of regular memory, their page structs were not initialized
		   by free_area_init. Do it here instead. */
		for (pfn = start_pfn; pfn < end_pfn; pfn++) {
			struct page *page = pfn_to_page(pfn);
			set_page_links(page, ZONE_DMA, node_id, pfn);
			set_page_count(page, 0);
			reset_page_mapcount(page);
			SetPageReserved(page);
			INIT_LIST_HEAD(&page->lru);
		}
	}
}

static void __init cell_spumem_init(int early)
{
	struct device_node *node;
	for (node = of_find_node_by_type(NULL, "spe");
			node; node = of_find_node_by_type(node, "spe")) {
		cell_spuprop_present(node, "local-store", early);
		cell_spuprop_present(node, "problem", early);
		cell_spuprop_present(node, "priv1", early);
		cell_spuprop_present(node, "priv2", early);
	}
}
#else
static void __init cell_spumem_init(int early)
{
}
#endif

static void cell_progress(char *s, unsigned short hex)
{
	printk("*** %04x : %s\n", hex, s ? s : "");
@@ -99,6 +170,8 @@ static void __init cell_setup_arch(void)
#endif

	mmio_nvram_init();

	cell_spumem_init(0);
}

/*
@@ -114,6 +187,8 @@ static void __init cell_init_early(void)

	ppc64_interrupt_controller = IC_CELL_PIC;

	cell_spumem_init(1);

	DBG(" <- cell_init_early()\n");
}

+76 −62
Original line number Diff line number Diff line
@@ -69,51 +69,49 @@ static void spu_restart_dma(struct spu *spu)

static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
{
	struct spu_priv2 __iomem *priv2;
	struct mm_struct *mm;
	struct spu_priv2 __iomem *priv2 = spu->priv2;
	struct mm_struct *mm = spu->mm;
	u64 esid, vsid;

	pr_debug("%s\n", __FUNCTION__);

	if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE_nr, &spu->flags)) {
		/* SLBs are pre-loaded for context switch, so
		 * we should never get here!
		 */
		printk("%s: invalid access during switch!\n", __func__);
		return 1;
	}

	if (REGION_ID(ea) != USER_REGION_ID) {
	if (!mm || (REGION_ID(ea) != USER_REGION_ID)) {
		/* Future: support kernel segments so that drivers
		 * can use SPUs.
		 */
		pr_debug("invalid region access at %016lx\n", ea);
		return 1;
	}

	priv2 = spu->priv2;
	mm = spu->mm;
	esid = (ea & ESID_MASK) | SLB_ESID_V;
	vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | SLB_VSID_USER;
	if (in_hugepage_area(mm->context, ea))
		vsid |= SLB_VSID_L;

	out_be64(&priv2->slb_index_W, spu->slb_replace);
	out_be64(&priv2->slb_vsid_RW, vsid);
	out_be64(&priv2->slb_esid_RW, esid);

	spu->slb_replace++;
	if (spu->slb_replace >= 8)
		spu->slb_replace = 0;

	out_be64(&priv2->slb_index_W, spu->slb_replace);
	out_be64(&priv2->slb_vsid_RW,
		(get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT)
						 | SLB_VSID_USER);
	out_be64(&priv2->slb_esid_RW, (ea & ESID_MASK) | SLB_ESID_V);

	spu_restart_dma(spu);

	pr_debug("set slb %d context %lx, ea %016lx, vsid %016lx, esid %016lx\n",
		spu->slb_replace, mm->context.id, ea,
		(get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT)| SLB_VSID_USER,
		 (ea & ESID_MASK) | SLB_ESID_V);
	return 0;
}

extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX
static int __spu_trap_data_map(struct spu *spu, unsigned long ea)
static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
{
	unsigned long dsisr;
	struct spu_priv1 __iomem *priv1;

	pr_debug("%s\n", __FUNCTION__);
	priv1 = spu->priv1;
	dsisr = in_be64(&priv1->mfc_dsisr_RW);

	/* Handle kernel space hash faults immediately.
	   User hash faults need to be deferred to process context. */
@@ -129,14 +127,17 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea)
		return 1;
	}

	spu->dar = ea;
	spu->dsisr = dsisr;
	mb();
	wake_up(&spu->stop_wq);
	return 0;
}

static int __spu_trap_mailbox(struct spu *spu)
{
	wake_up_all(&spu->ibox_wq);
	kill_fasync(&spu->ibox_fasync, SIGIO, POLLIN);
	if (spu->ibox_callback)
		spu->ibox_callback(spu);

	/* atomically disable SPU mailbox interrupts */
	spin_lock(&spu->register_lock);
@@ -171,8 +172,8 @@ static int __spu_trap_tag_group(struct spu *spu)

static int __spu_trap_spubox(struct spu *spu)
{
	wake_up_all(&spu->wbox_wq);
	kill_fasync(&spu->wbox_fasync, SIGIO, POLLOUT);
	if (spu->wbox_callback)
		spu->wbox_callback(spu);

	/* atomically disable SPU mailbox interrupts */
	spin_lock(&spu->register_lock);
@@ -220,17 +221,25 @@ static irqreturn_t
spu_irq_class_1(int irq, void *data, struct pt_regs *regs)
{
	struct spu *spu;
	unsigned long stat, dar;
	unsigned long stat, mask, dar, dsisr;

	spu = data;
	stat  = in_be64(&spu->priv1->int_stat_class1_RW);

	/* atomically read & clear class1 status. */
	spin_lock(&spu->register_lock);
	mask  = in_be64(&spu->priv1->int_mask_class1_RW);
	stat  = in_be64(&spu->priv1->int_stat_class1_RW) & mask;
	dar   = in_be64(&spu->priv1->mfc_dar_RW);
	dsisr = in_be64(&spu->priv1->mfc_dsisr_RW);
	out_be64(&spu->priv1->mfc_dsisr_RW, 0UL);
	out_be64(&spu->priv1->int_stat_class1_RW, stat);
	spin_unlock(&spu->register_lock);

	if (stat & 1) /* segment fault */
		__spu_trap_data_seg(spu, dar);

	if (stat & 2) { /* mapping fault */
		__spu_trap_data_map(spu, dar);
		__spu_trap_data_map(spu, dar, dsisr);
	}

	if (stat & 4) /* ls compare & suspend on get */
@@ -239,7 +248,6 @@ spu_irq_class_1(int irq, void *data, struct pt_regs *regs)
	if (stat & 8) /* ls compare & suspend on put */
		;

	out_be64(&spu->priv1->int_stat_class1_RW, stat);
	return stat ? IRQ_HANDLED : IRQ_NONE;
}

@@ -396,8 +404,6 @@ EXPORT_SYMBOL(spu_alloc);
void spu_free(struct spu *spu)
{
	down(&spu_mutex);
	spu->ibox_fasync = NULL;
	spu->wbox_fasync = NULL;
	list_add_tail(&spu->list, &spu_list);
	up(&spu_mutex);
}
@@ -405,15 +411,13 @@ EXPORT_SYMBOL(spu_free);

static int spu_handle_mm_fault(struct spu *spu)
{
	struct spu_priv1 __iomem *priv1;
	struct mm_struct *mm = spu->mm;
	struct vm_area_struct *vma;
	u64 ea, dsisr, is_write;
	int ret;

	priv1 = spu->priv1;
	ea = in_be64(&priv1->mfc_dar_RW);
	dsisr = in_be64(&priv1->mfc_dsisr_RW);
	ea = spu->dar;
	dsisr = spu->dsisr;
#if 0
	if (!IS_VALID_EA(ea)) {
		return -EFAULT;
@@ -476,15 +480,14 @@ static int spu_handle_mm_fault(struct spu *spu)

static int spu_handle_pte_fault(struct spu *spu)
{
	struct spu_priv1 __iomem *priv1;
	u64 ea, dsisr, access, error = 0UL;
	int ret = 0;

	priv1 = spu->priv1;
	ea = in_be64(&priv1->mfc_dar_RW);
	dsisr = in_be64(&priv1->mfc_dsisr_RW);
	access = (_PAGE_PRESENT | _PAGE_USER);
	ea = spu->dar;
	dsisr = spu->dsisr;
	if (dsisr & MFC_DSISR_PTE_NOT_FOUND) {
		access = (_PAGE_PRESENT | _PAGE_USER);
		access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
		if (hash_page(ea, access, 0x300) != 0)
			error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
	}
@@ -495,18 +498,33 @@ static int spu_handle_pte_fault(struct spu *spu)
		else
			error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR;
	}
	if (!error)
	spu->dar = 0UL;
	spu->dsisr = 0UL;
	if (!error) {
		spu_restart_dma(spu);

	} else {
		__spu_trap_invalid_dma(spu);
	}
	return ret;
}

static inline int spu_pending(struct spu *spu, u32 * stat)
{
	struct spu_problem __iomem *prob = spu->problem;
	u64 pte_fault;

	*stat = in_be32(&prob->spu_status_R);
	pte_fault = spu->dsisr &
		    (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED);
	return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0;
}

int spu_run(struct spu *spu)
{
	struct spu_problem __iomem *prob;
	struct spu_priv1 __iomem *priv1;
	struct spu_priv2 __iomem *priv2;
	unsigned long status;
	u32 status;
	int ret;

	prob = spu->problem;
@@ -514,21 +532,15 @@ int spu_run(struct spu *spu)
	priv2 = spu->priv2;

	/* Let SPU run.  */
	spu->mm = current->mm;
	eieio();
	out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);

	do {
		ret = wait_event_interruptible(spu->stop_wq,
			 (!((status = in_be32(&prob->spu_status_R)) & 0x1))
			|| (in_be64(&priv1->mfc_dsisr_RW) & MFC_DSISR_PTE_NOT_FOUND)
			|| spu->class_0_pending);

		if (status & SPU_STATUS_STOPPED_BY_STOP)
			ret = -EAGAIN;
		else if (status & SPU_STATUS_STOPPED_BY_HALT)
			ret = -EIO;
		else if (in_be64(&priv1->mfc_dsisr_RW) & MFC_DSISR_PTE_NOT_FOUND)
					       spu_pending(spu, &status));

		if (spu->dsisr &
		    (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))
			ret = spu_handle_pte_fault(spu);

		if (spu->class_0_pending)
@@ -537,7 +549,9 @@ int spu_run(struct spu *spu)
		if (!ret && signal_pending(current))
			ret = -ERESTARTSYS;

	} while (!ret);
	} while (!ret && !(status &
			   (SPU_STATUS_STOPPED_BY_STOP |
			    SPU_STATUS_STOPPED_BY_HALT)));

	/* Ensure SPU is stopped.  */
	out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
@@ -549,8 +563,6 @@ int spu_run(struct spu *spu)
	out_be64(&priv1->tlb_invalidate_entry_W, 0UL);
	eieio();

	spu->mm = NULL;

	/* Check for SPU breakpoint.  */
	if (unlikely(current->ptrace & PT_PTRACED)) {
		status = in_be32(&prob->spu_status_R);
@@ -669,19 +681,21 @@ static int __init create_spu(struct device_node *spe)
	spu->stop_code = 0;
	spu->slb_replace = 0;
	spu->mm = NULL;
	spu->ctx = NULL;
	spu->rq = NULL;
	spu->pid = 0;
	spu->class_0_pending = 0;
	spu->flags = 0UL;
	spu->dar = 0UL;
	spu->dsisr = 0UL;
	spin_lock_init(&spu->register_lock);

	out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1));
	out_be64(&spu->priv1->mfc_sr1_RW, 0x33);

	init_waitqueue_head(&spu->stop_wq);
	init_waitqueue_head(&spu->wbox_wq);
	init_waitqueue_head(&spu->ibox_wq);

	spu->ibox_fasync = NULL;
	spu->wbox_fasync = NULL;
	spu->ibox_callback = NULL;
	spu->wbox_callback = NULL;

	down(&spu_mutex);
	spu->number = number++;
+1 −1
Original line number Diff line number Diff line
obj-$(CONFIG_SPU_FS) += spufs.o

spufs-y += inode.o file.o context.o switch.o syscalls.o
spufs-y += sched.o backing_ops.o hw_ops.o

# Rules to build switch.o with the help of SPU tool chain
SPU_CROSS	:= spu-
+252 −0
Original line number Diff line number Diff line
/* backing_ops.c - query/set operations on saved SPU context.
 *
 * Copyright (C) IBM 2005
 * Author: Mark Nutter <mnutter@us.ibm.com>
 *
 * These register operations allow SPUFS to operate on saved
 * SPU contexts rather than hardware.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <linux/config.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/stddef.h>
#include <linux/unistd.h>

#include <asm/io.h>
#include <asm/spu.h>
#include <asm/spu_csa.h>
#include <asm/mmu_context.h>
#include "spufs.h"

/*
 * Reads/writes to various problem and priv2 registers require
 * state changes, i.e.  generate SPU events, modify channel
 * counts, etc.
 */

static void gen_spu_event(struct spu_context *ctx, u32 event)
{
	u64 ch0_cnt;
	u64 ch0_data;
	u64 ch1_data;

	ch0_cnt = ctx->csa.spu_chnlcnt_RW[0];
	ch0_data = ctx->csa.spu_chnldata_RW[0];
	ch1_data = ctx->csa.spu_chnldata_RW[1];
	ctx->csa.spu_chnldata_RW[0] |= event;
	if ((ch0_cnt == 0) && !(ch0_data & event) && (ch1_data & event)) {
		ctx->csa.spu_chnlcnt_RW[0] = 1;
	}
}

static int spu_backing_mbox_read(struct spu_context *ctx, u32 * data)
{
	u32 mbox_stat;
	int ret = 0;

	spin_lock(&ctx->csa.register_lock);
	mbox_stat = ctx->csa.prob.mb_stat_R;
	if (mbox_stat & 0x0000ff) {
		/* Read the first available word.
		 * Implementation note: the depth
		 * of pu_mb_R is currently 1.
		 */
		*data = ctx->csa.prob.pu_mb_R;
		ctx->csa.prob.mb_stat_R &= ~(0x0000ff);
		ctx->csa.spu_chnlcnt_RW[28] = 1;
		gen_spu_event(ctx, MFC_PU_MAILBOX_AVAILABLE_EVENT);
		ret = 4;
	}
	spin_unlock(&ctx->csa.register_lock);
	return ret;
}

static u32 spu_backing_mbox_stat_read(struct spu_context *ctx)
{
	return ctx->csa.prob.mb_stat_R;
}

static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data)
{
	int ret;

	spin_lock(&ctx->csa.register_lock);
	if (ctx->csa.prob.mb_stat_R & 0xff0000) {
		/* Read the first available word.
		 * Implementation note: the depth
		 * of puint_mb_R is currently 1.
		 */
		*data = ctx->csa.priv2.puint_mb_R;
		ctx->csa.prob.mb_stat_R &= ~(0xff0000);
		ctx->csa.spu_chnlcnt_RW[30] = 1;
		gen_spu_event(ctx, MFC_PU_INT_MAILBOX_AVAILABLE_EVENT);
		ret = 4;
	} else {
		/* make sure we get woken up by the interrupt */
		ctx->csa.priv1.int_mask_class2_RW |= 0x1UL;
		ret = 0;
	}
	spin_unlock(&ctx->csa.register_lock);
	return ret;
}

static int spu_backing_wbox_write(struct spu_context *ctx, u32 data)
{
	int ret;

	spin_lock(&ctx->csa.register_lock);
	if ((ctx->csa.prob.mb_stat_R) & 0x00ff00) {
		int slot = ctx->csa.spu_chnlcnt_RW[29];
		int avail = (ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8;

		/* We have space to write wbox_data.
		 * Implementation note: the depth
		 * of spu_mb_W is currently 4.
		 */
		BUG_ON(avail != (4 - slot));
		ctx->csa.spu_mailbox_data[slot] = data;
		ctx->csa.spu_chnlcnt_RW[29] = ++slot;
		ctx->csa.prob.mb_stat_R = (((4 - slot) & 0xff) << 8);
		gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT);
		ret = 4;
	} else {
		/* make sure we get woken up by the interrupt when space
		   becomes available */
		ctx->csa.priv1.int_mask_class2_RW |= 0x10;
		ret = 0;
	}
	spin_unlock(&ctx->csa.register_lock);
	return ret;
}

static u32 spu_backing_signal1_read(struct spu_context *ctx)
{
	return ctx->csa.spu_chnldata_RW[3];
}

static void spu_backing_signal1_write(struct spu_context *ctx, u32 data)
{
	spin_lock(&ctx->csa.register_lock);
	if (ctx->csa.priv2.spu_cfg_RW & 0x1)
		ctx->csa.spu_chnldata_RW[3] |= data;
	else
		ctx->csa.spu_chnldata_RW[3] = data;
	ctx->csa.spu_chnlcnt_RW[3] = 1;
	gen_spu_event(ctx, MFC_SIGNAL_1_EVENT);
	spin_unlock(&ctx->csa.register_lock);
}

static u32 spu_backing_signal2_read(struct spu_context *ctx)
{
	return ctx->csa.spu_chnldata_RW[4];
}

static void spu_backing_signal2_write(struct spu_context *ctx, u32 data)
{
	spin_lock(&ctx->csa.register_lock);
	if (ctx->csa.priv2.spu_cfg_RW & 0x2)
		ctx->csa.spu_chnldata_RW[4] |= data;
	else
		ctx->csa.spu_chnldata_RW[4] = data;
	ctx->csa.spu_chnlcnt_RW[4] = 1;
	gen_spu_event(ctx, MFC_SIGNAL_2_EVENT);
	spin_unlock(&ctx->csa.register_lock);
}

static void spu_backing_signal1_type_set(struct spu_context *ctx, u64 val)
{
	u64 tmp;

	spin_lock(&ctx->csa.register_lock);
	tmp = ctx->csa.priv2.spu_cfg_RW;
	if (val)
		tmp |= 1;
	else
		tmp &= ~1;
	ctx->csa.priv2.spu_cfg_RW = tmp;
	spin_unlock(&ctx->csa.register_lock);
}

static u64 spu_backing_signal1_type_get(struct spu_context *ctx)
{
	return ((ctx->csa.priv2.spu_cfg_RW & 1) != 0);
}

static void spu_backing_signal2_type_set(struct spu_context *ctx, u64 val)
{
	u64 tmp;

	spin_lock(&ctx->csa.register_lock);
	tmp = ctx->csa.priv2.spu_cfg_RW;
	if (val)
		tmp |= 2;
	else
		tmp &= ~2;
	ctx->csa.priv2.spu_cfg_RW = tmp;
	spin_unlock(&ctx->csa.register_lock);
}

static u64 spu_backing_signal2_type_get(struct spu_context *ctx)
{
	return ((ctx->csa.priv2.spu_cfg_RW & 2) != 0);
}

static u32 spu_backing_npc_read(struct spu_context *ctx)
{
	return ctx->csa.prob.spu_npc_RW;
}

static void spu_backing_npc_write(struct spu_context *ctx, u32 val)
{
	ctx->csa.prob.spu_npc_RW = val;
}

static u32 spu_backing_status_read(struct spu_context *ctx)
{
	return ctx->csa.prob.spu_status_R;
}

static char *spu_backing_get_ls(struct spu_context *ctx)
{
	return ctx->csa.lscsa->ls;
}

struct spu_context_ops spu_backing_ops = {
	.mbox_read = spu_backing_mbox_read,
	.mbox_stat_read = spu_backing_mbox_stat_read,
	.ibox_read = spu_backing_ibox_read,
	.wbox_write = spu_backing_wbox_write,
	.signal1_read = spu_backing_signal1_read,
	.signal1_write = spu_backing_signal1_write,
	.signal2_read = spu_backing_signal2_read,
	.signal2_write = spu_backing_signal2_write,
	.signal1_type_set = spu_backing_signal1_type_set,
	.signal1_type_get = spu_backing_signal1_type_get,
	.signal2_type_set = spu_backing_signal2_type_set,
	.signal2_type_get = spu_backing_signal2_type_get,
	.npc_read = spu_backing_npc_read,
	.npc_write = spu_backing_npc_write,
	.status_read = spu_backing_status_read,
	.get_ls = spu_backing_get_ls,
};
+96 −18
Original line number Diff line number Diff line
@@ -20,39 +20,38 @@
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <asm/spu.h>
#include <asm/spu_csa.h>
#include "spufs.h"

struct spu_context *alloc_spu_context(void)
struct spu_context *alloc_spu_context(struct address_space *local_store)
{
	struct spu_context *ctx;
	ctx = kmalloc(sizeof *ctx, GFP_KERNEL);
	if (!ctx)
		goto out;
	/* Future enhancement: do not call spu_alloc()
	 * here.  This step should be deferred until
	 * spu_run()!!
	 *
	 * More work needs to be done to read(),
	 * write(), mmap(), etc., so that operations
	 * are performed on CSA when the context is
	 * not currently being run.  In this way we
	 * can support arbitrarily large number of
	 * entries in /spu, allow state queries, etc.
	/* Binding to physical processor deferred
	 * until spu_activate().
	 */
	ctx->spu = spu_alloc();
	if (!ctx->spu)
		goto out_free;
	spu_init_csa(&ctx->csa);
	if (!ctx->csa.lscsa) {
		spu_free(ctx->spu);
		goto out_free;
	}
	init_rwsem(&ctx->backing_sema);
	spin_lock_init(&ctx->mmio_lock);
	kref_init(&ctx->kref);
	init_rwsem(&ctx->state_sema);
	init_waitqueue_head(&ctx->ibox_wq);
	init_waitqueue_head(&ctx->wbox_wq);
	ctx->ibox_fasync = NULL;
	ctx->wbox_fasync = NULL;
	ctx->state = SPU_STATE_SAVED;
	ctx->local_store = local_store;
	ctx->spu = NULL;
	ctx->ops = &spu_backing_ops;
	ctx->owner = get_task_mm(current);
	goto out;
out_free:
	kfree(ctx);
@@ -65,8 +64,11 @@ void destroy_spu_context(struct kref *kref)
{
	struct spu_context *ctx;
	ctx = container_of(kref, struct spu_context, kref);
	if (ctx->spu)
		spu_free(ctx->spu);
	down_write(&ctx->state_sema);
	spu_deactivate(ctx);
	ctx->ibox_fasync = NULL;
	ctx->wbox_fasync = NULL;
	up_write(&ctx->state_sema);
	spu_fini_csa(&ctx->csa);
	kfree(ctx);
}
@@ -82,4 +84,80 @@ int put_spu_context(struct spu_context *ctx)
	return kref_put(&ctx->kref, &destroy_spu_context);
}

/* give up the mm reference when the context is about to be destroyed */
void spu_forget(struct spu_context *ctx)
{
	struct mm_struct *mm;
	spu_acquire_saved(ctx);
	mm = ctx->owner;
	ctx->owner = NULL;
	mmput(mm);
	spu_release(ctx);
}

void spu_acquire(struct spu_context *ctx)
{
	down_read(&ctx->state_sema);
}

void spu_release(struct spu_context *ctx)
{
	up_read(&ctx->state_sema);
}

static void spu_unmap_mappings(struct spu_context *ctx)
{
	unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
}

int spu_acquire_runnable(struct spu_context *ctx)
{
	int ret = 0;

	down_read(&ctx->state_sema);
	if (ctx->state == SPU_STATE_RUNNABLE)
		return 0;
	/* ctx is about to be freed, can't acquire any more */
	if (!ctx->owner) {
		ret = -EINVAL;
		goto out;
	}
	up_read(&ctx->state_sema);

	down_write(&ctx->state_sema);
	if (ctx->state == SPU_STATE_SAVED) {
		spu_unmap_mappings(ctx);
		ret = spu_activate(ctx, 0);
		ctx->state = SPU_STATE_RUNNABLE;
	}
	downgrade_write(&ctx->state_sema);
	if (ret)
		goto out;

	/* On success, we return holding the lock */
	return ret;
out:
	/* Release here, to simplify calling code. */
	up_read(&ctx->state_sema);

	return ret;
}

void spu_acquire_saved(struct spu_context *ctx)
{
	down_read(&ctx->state_sema);

	if (ctx->state == SPU_STATE_SAVED)
		return;

	up_read(&ctx->state_sema);
	down_write(&ctx->state_sema);

	if (ctx->state == SPU_STATE_RUNNABLE) {
		spu_unmap_mappings(ctx);
		spu_deactivate(ctx);
		ctx->state = SPU_STATE_SAVED;
	}

	downgrade_write(&ctx->state_sema);
}
Loading