Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cbacdd95 authored by Dimitri Sivanich's avatar Dimitri Sivanich Committed by Linus Torvalds
Browse files

SGI Altix mmtimer: allow larger number of timers per node



The purpose of this patch to the SGI Altix specific mmtimer (posix timer)
driver is to allow a virtually infinite number of timers to be set per
node.

Timers will now be kept on a sorted per-node list and a single node-based
hardware comparator is used to trigger the next timer.

[akpm@linux-foundation.org: mark things static]
[akpm@linux-foundation.org: fix warning]
Signed-off-by: default avatarDimitri Sivanich <sivanich@sgi.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d17468c7
Loading
Loading
Loading
Loading
+244 −156
Original line number Diff line number Diff line
@@ -74,9 +74,8 @@ static const struct file_operations mmtimer_fops = {
 * We only have comparison registers RTC1-4 currently available per
 * node.  RTC0 is used by SAL.
 */
#define NUM_COMPARATORS 3
/* Check for an RTC interrupt pending */
static int inline mmtimer_int_pending(int comparator)
static int mmtimer_int_pending(int comparator)
{
	if (HUB_L((unsigned long *)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED)) &
			SH_EVENT_OCCURRED_RTC1_INT_MASK << comparator)
@@ -84,15 +83,16 @@ static int inline mmtimer_int_pending(int comparator)
	else
		return 0;
}

/* Clear the RTC interrupt pending bit */
static void inline mmtimer_clr_int_pending(int comparator)
static void mmtimer_clr_int_pending(int comparator)
{
	HUB_S((u64 *)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS),
		SH_EVENT_OCCURRED_RTC1_INT_MASK << comparator);
}

/* Setup timer on comparator RTC1 */
static void inline mmtimer_setup_int_0(u64 expires)
static void mmtimer_setup_int_0(int cpu, u64 expires)
{
	u64 val;

@@ -106,7 +106,7 @@ static void inline mmtimer_setup_int_0(u64 expires)
	mmtimer_clr_int_pending(0);

	val = ((u64)SGI_MMTIMER_VECTOR << SH_RTC1_INT_CONFIG_IDX_SHFT) |
		((u64)cpu_physical_id(smp_processor_id()) <<
		((u64)cpu_physical_id(cpu) <<
			SH_RTC1_INT_CONFIG_PID_SHFT);

	/* Set configuration */
@@ -122,7 +122,7 @@ static void inline mmtimer_setup_int_0(u64 expires)
}

/* Setup timer on comparator RTC2 */
static void inline mmtimer_setup_int_1(u64 expires)
static void mmtimer_setup_int_1(int cpu, u64 expires)
{
	u64 val;

@@ -133,7 +133,7 @@ static void inline mmtimer_setup_int_1(u64 expires)
	mmtimer_clr_int_pending(1);

	val = ((u64)SGI_MMTIMER_VECTOR << SH_RTC2_INT_CONFIG_IDX_SHFT) |
		((u64)cpu_physical_id(smp_processor_id()) <<
		((u64)cpu_physical_id(cpu) <<
			SH_RTC2_INT_CONFIG_PID_SHFT);

	HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC2_INT_CONFIG), val);
@@ -144,7 +144,7 @@ static void inline mmtimer_setup_int_1(u64 expires)
}

/* Setup timer on comparator RTC3 */
static void inline mmtimer_setup_int_2(u64 expires)
static void mmtimer_setup_int_2(int cpu, u64 expires)
{
	u64 val;

@@ -155,7 +155,7 @@ static void inline mmtimer_setup_int_2(u64 expires)
	mmtimer_clr_int_pending(2);

	val = ((u64)SGI_MMTIMER_VECTOR << SH_RTC3_INT_CONFIG_IDX_SHFT) |
		((u64)cpu_physical_id(smp_processor_id()) <<
		((u64)cpu_physical_id(cpu) <<
			SH_RTC3_INT_CONFIG_PID_SHFT);

	HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC3_INT_CONFIG), val);
@@ -170,22 +170,22 @@ static void inline mmtimer_setup_int_2(u64 expires)
 * in order to insure that the setup succeeds in a deterministic time frame.
 * It will check if the interrupt setup succeeded.
 */
static int inline mmtimer_setup(int comparator, unsigned long expires)
static int mmtimer_setup(int cpu, int comparator, unsigned long expires)
{

	switch (comparator) {
	case 0:
		mmtimer_setup_int_0(expires);
		mmtimer_setup_int_0(cpu, expires);
		break;
	case 1:
		mmtimer_setup_int_1(expires);
		mmtimer_setup_int_1(cpu, expires);
		break;
	case 2:
		mmtimer_setup_int_2(expires);
		mmtimer_setup_int_2(cpu, expires);
		break;
	}
	/* We might've missed our expiration time */
	if (rtc_time() < expires)
	if (rtc_time() <= expires)
		return 1;

	/*
@@ -195,7 +195,7 @@ static int inline mmtimer_setup(int comparator, unsigned long expires)
	return mmtimer_int_pending(comparator);
}

static int inline mmtimer_disable_int(long nasid, int comparator)
static int mmtimer_disable_int(long nasid, int comparator)
{
	switch (comparator) {
	case 0:
@@ -216,18 +216,124 @@ static int inline mmtimer_disable_int(long nasid, int comparator)
	return 0;
}

#define TIMER_OFF 0xbadcabLL
#define COMPARATOR	1		/* The comparator to use */

/* There is one of these for each comparator */
typedef struct mmtimer {
	spinlock_t lock ____cacheline_aligned;
#define TIMER_OFF	0xbadcabLL	/* Timer is not setup */
#define TIMER_SET	0		/* Comparator is set for this timer */

/* There is one of these for each timer */
struct mmtimer {
	struct rb_node list;
	struct k_itimer *timer;
	int i;
	int cpu;
};

struct mmtimer_node {
	spinlock_t lock ____cacheline_aligned;
	struct rb_root timer_head;
	struct rb_node *next;
	struct tasklet_struct tasklet;
} mmtimer_t;
};
static struct mmtimer_node *timers;


/*
 * Add a new mmtimer struct to the node's mmtimer list.
 * This function assumes the struct mmtimer_node is locked.
 */
static void mmtimer_add_list(struct mmtimer *n)
{
	int nodeid = n->timer->it.mmtimer.node;
	unsigned long expires = n->timer->it.mmtimer.expires;
	struct rb_node **link = &timers[nodeid].timer_head.rb_node;
	struct rb_node *parent = NULL;
	struct mmtimer *x;

	/*
	 * Find the right place in the rbtree:
	 */
	while (*link) {
		parent = *link;
		x = rb_entry(parent, struct mmtimer, list);

		if (expires < x->timer->it.mmtimer.expires)
			link = &(*link)->rb_left;
		else
			link = &(*link)->rb_right;
	}

	/*
	 * Insert the timer to the rbtree and check whether it
	 * replaces the first pending timer
	 */
	rb_link_node(&n->list, parent, link);
	rb_insert_color(&n->list, &timers[nodeid].timer_head);

static mmtimer_t ** timers;
	if (!timers[nodeid].next || expires < rb_entry(timers[nodeid].next,
			struct mmtimer, list)->timer->it.mmtimer.expires)
		timers[nodeid].next = &n->list;
}

/*
 * Set the comparator for the next timer.
 * This function assumes the struct mmtimer_node is locked.
 */
static void mmtimer_set_next_timer(int nodeid)
{
	struct mmtimer_node *n = &timers[nodeid];
	struct mmtimer *x;
	struct k_itimer *t;
	int o;

restart:
	if (n->next == NULL)
		return;

	x = rb_entry(n->next, struct mmtimer, list);
	t = x->timer;
	if (!t->it.mmtimer.incr) {
		/* Not an interval timer */
		if (!mmtimer_setup(x->cpu, COMPARATOR,
					t->it.mmtimer.expires)) {
			/* Late setup, fire now */
			tasklet_schedule(&n->tasklet);
		}
		return;
	}

	/* Interval timer */
	o = 0;
	while (!mmtimer_setup(x->cpu, COMPARATOR, t->it.mmtimer.expires)) {
		unsigned long e, e1;
		struct rb_node *next;
		t->it.mmtimer.expires += t->it.mmtimer.incr << o;
		t->it_overrun += 1 << o;
		o++;
		if (o > 20) {
			printk(KERN_ALERT "mmtimer: cannot reschedule timer\n");
			t->it.mmtimer.clock = TIMER_OFF;
			n->next = rb_next(&x->list);
			rb_erase(&x->list, &n->timer_head);
			kfree(x);
			goto restart;
		}

		e = t->it.mmtimer.expires;
		next = rb_next(&x->list);

		if (next == NULL)
			continue;

		e1 = rb_entry(next, struct mmtimer, list)->
			timer->it.mmtimer.expires;
		if (e > e1) {
			n->next = next;
			rb_erase(&x->list, &n->timer_head);
			mmtimer_add_list(x);
			goto restart;
		}
	}
}

/**
 * mmtimer_ioctl - ioctl interface for /dev/mmtimer
@@ -390,35 +496,6 @@ static int sgi_clock_set(clockid_t clockid, struct timespec *tp)
	return 0;
}

/*
 * Schedule the next periodic interrupt. This function will attempt
 * to schedule a periodic interrupt later if necessary. If the scheduling
 * of an interrupt fails then the time to skip is lengthened
 * exponentially in order to ensure that the next interrupt
 * can be properly scheduled..
 */
static int inline reschedule_periodic_timer(mmtimer_t *x)
{
	int n;
	struct k_itimer *t = x->timer;

	t->it.mmtimer.clock = x->i;
	t->it_overrun--;

	n = 0;
	do {

		t->it.mmtimer.expires += t->it.mmtimer.incr << n;
		t->it_overrun += 1 << n;
		n++;
		if (n > 20)
			return 1;

	} while (!mmtimer_setup(x->i, t->it.mmtimer.expires));

	return 0;
}

/**
 * mmtimer_interrupt - timer interrupt handler
 * @irq: irq received
@@ -435,71 +512,75 @@ static int inline reschedule_periodic_timer(mmtimer_t *x)
static irqreturn_t
mmtimer_interrupt(int irq, void *dev_id)
{
	int i;
	unsigned long expires = 0;
	int result = IRQ_NONE;
	unsigned indx = cpu_to_node(smp_processor_id());
	struct mmtimer *base;

	spin_lock(&timers[indx].lock);
	base = rb_entry(timers[indx].next, struct mmtimer, list);
	if (base == NULL) {
		spin_unlock(&timers[indx].lock);
		return result;
	}

	/*
	 * Do this once for each comparison register
	 */
	for (i = 0; i < NUM_COMPARATORS; i++) {
		mmtimer_t *base = timers[indx] + i;
		/* Make sure this doesn't get reused before tasklet_sched */
		spin_lock(&base->lock);
	if (base->cpu == smp_processor_id()) {
		if (base->timer)
			expires = base->timer->it.mmtimer.expires;
		/* expires test won't work with shared irqs */
			if ((mmtimer_int_pending(i) > 0) ||
				(expires && (expires < rtc_time()))) {
				mmtimer_clr_int_pending(i);
				tasklet_schedule(&base->tasklet);
		if ((mmtimer_int_pending(COMPARATOR) > 0) ||
			(expires && (expires <= rtc_time()))) {
			mmtimer_clr_int_pending(COMPARATOR);
			tasklet_schedule(&timers[indx].tasklet);
			result = IRQ_HANDLED;
		}
	}
		spin_unlock(&base->lock);
		expires = 0;
	}
	spin_unlock(&timers[indx].lock);
	return result;
}

void mmtimer_tasklet(unsigned long data) {
	mmtimer_t *x = (mmtimer_t *)data;
	struct k_itimer *t = x->timer;
static void mmtimer_tasklet(unsigned long data)
{
	int nodeid = data;
	struct mmtimer_node *mn = &timers[nodeid];
	struct mmtimer *x = rb_entry(mn->next, struct mmtimer, list);
	struct k_itimer *t;
	unsigned long flags;

	if (t == NULL)
		return;

	/* Send signal and deal with periodic signals */
	spin_lock_irqsave(&t->it_lock, flags);
	spin_lock(&x->lock);
	/* If timer was deleted between interrupt and here, leave */
	if (t != x->timer)
	spin_lock_irqsave(&mn->lock, flags);
	if (!mn->next)
		goto out;
	t->it_overrun = 0;

	if (posix_timer_event(t, 0) != 0) {
	x = rb_entry(mn->next, struct mmtimer, list);
	t = x->timer;

		// printk(KERN_WARNING "mmtimer: cannot deliver signal.\n");
	if (t->it.mmtimer.clock == TIMER_OFF)
		goto out;

	t->it_overrun = 0;

	mn->next = rb_next(&x->list);
	rb_erase(&x->list, &mn->timer_head);

	if (posix_timer_event(t, 0) != 0)
		t->it_overrun++;
	}

	if(t->it.mmtimer.incr) {
		/* Periodic timer */
		if (reschedule_periodic_timer(x)) {
			printk(KERN_WARNING "mmtimer: unable to reschedule\n");
			x->timer = NULL;
		}
		t->it.mmtimer.expires += t->it.mmtimer.incr;
		mmtimer_add_list(x);
	} else {
		/* Ensure we don't false trigger in mmtimer_interrupt */
		t->it.mmtimer.clock = TIMER_OFF;
		t->it.mmtimer.expires = 0;
		kfree(x);
	}
	/* Set comparator for next timer, if there is one */
	mmtimer_set_next_timer(nodeid);

	t->it_overrun_last = t->it_overrun;
out:
	spin_unlock(&x->lock);
	spin_unlock_irqrestore(&t->it_lock, flags);
	spin_unlock_irqrestore(&mn->lock, flags);
}

static int sgi_timer_create(struct k_itimer *timer)
@@ -516,19 +597,50 @@ static int sgi_timer_create(struct k_itimer *timer)
 */
static int sgi_timer_del(struct k_itimer *timr)
{
	int i = timr->it.mmtimer.clock;
	cnodeid_t nodeid = timr->it.mmtimer.node;
	mmtimer_t *t = timers[nodeid] + i;
	unsigned long irqflags;

	if (i != TIMER_OFF) {
		spin_lock_irqsave(&t->lock, irqflags);
		mmtimer_disable_int(cnodeid_to_nasid(nodeid),i);
		t->timer = NULL;
	spin_lock_irqsave(&timers[nodeid].lock, irqflags);
	if (timr->it.mmtimer.clock != TIMER_OFF) {
		unsigned long expires = timr->it.mmtimer.expires;
		struct rb_node *n = timers[nodeid].timer_head.rb_node;
		struct mmtimer *uninitialized_var(t);
		int r = 0;

		timr->it.mmtimer.clock = TIMER_OFF;
		timr->it.mmtimer.expires = 0;
		spin_unlock_irqrestore(&t->lock, irqflags);

		while (n) {
			t = rb_entry(n, struct mmtimer, list);
			if (t->timer == timr)
				break;

			if (expires < t->timer->it.mmtimer.expires)
				n = n->rb_left;
			else
				n = n->rb_right;
		}

		if (!n) {
			spin_unlock_irqrestore(&timers[nodeid].lock, irqflags);
			return 0;
		}

		if (timers[nodeid].next == n) {
			timers[nodeid].next = rb_next(n);
			r = 1;
		}

		rb_erase(n, &timers[nodeid].timer_head);
		kfree(t);

		if (r) {
			mmtimer_disable_int(cnodeid_to_nasid(nodeid),
				COMPARATOR);
			mmtimer_set_next_timer(nodeid);
		}
	}
	spin_unlock_irqrestore(&timers[nodeid].lock, irqflags);
	return 0;
}

@@ -557,12 +669,11 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,
	struct itimerspec * new_setting,
	struct itimerspec * old_setting)
{

	int i;
	unsigned long when, period, irqflags;
	int err = 0;
	cnodeid_t nodeid;
	mmtimer_t *base;
	struct mmtimer *base;
	struct rb_node *n;

	if (old_setting)
		sgi_timer_get(timr, old_setting);
@@ -575,6 +686,10 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,
		/* Clear timer */
		return 0;

	base = kmalloc(sizeof(struct mmtimer), GFP_KERNEL);
	if (base == NULL)
		return -ENOMEM;

	if (flags & TIMER_ABSTIME) {
		struct timespec n;
		unsigned long now;
@@ -604,47 +719,38 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,
	preempt_disable();

	nodeid =  cpu_to_node(smp_processor_id());
retry:
	/* Don't use an allocated timer, or a deleted one that's pending */
	for(i = 0; i< NUM_COMPARATORS; i++) {
		base = timers[nodeid] + i;
		if (!base->timer && !base->tasklet.state) {
			break;
		}
	}

	if (i == NUM_COMPARATORS) {
		preempt_enable();
		return -EBUSY;
	}

	spin_lock_irqsave(&base->lock, irqflags);
	/* Lock the node timer structure */
	spin_lock_irqsave(&timers[nodeid].lock, irqflags);

	if (base->timer || base->tasklet.state != 0) {
		spin_unlock_irqrestore(&base->lock, irqflags);
		goto retry;
	}
	base->timer = timr;
	base->cpu = smp_processor_id();

	timr->it.mmtimer.clock = i;
	timr->it.mmtimer.clock = TIMER_SET;
	timr->it.mmtimer.node = nodeid;
	timr->it.mmtimer.incr = period;
	timr->it.mmtimer.expires = when;

	if (period == 0) {
		if (!mmtimer_setup(i, when)) {
			mmtimer_disable_int(-1, i);
			posix_timer_event(timr, 0);
			timr->it.mmtimer.expires = 0;
		}
	} else {
		timr->it.mmtimer.expires -= period;
		if (reschedule_periodic_timer(base))
			err = -EINVAL;
	n = timers[nodeid].next;

	/* Add the new struct mmtimer to node's timer list */
	mmtimer_add_list(base);

	if (timers[nodeid].next == n) {
		/* No need to reprogram comparator for now */
		spin_unlock_irqrestore(&timers[nodeid].lock, irqflags);
		preempt_enable();
		return err;
	}

	spin_unlock_irqrestore(&base->lock, irqflags);
	/* We need to reprogram the comparator */
	if (n)
		mmtimer_disable_int(cnodeid_to_nasid(nodeid), COMPARATOR);

	mmtimer_set_next_timer(nodeid);

	/* Unlock the node timer structure */
	spin_unlock_irqrestore(&timers[nodeid].lock, irqflags);

	preempt_enable();

@@ -669,7 +775,6 @@ static struct k_clock sgi_clock = {
 */
static int __init mmtimer_init(void)
{
	unsigned i;
	cnodeid_t node, maxn = -1;

	if (!ia64_platform_is("sn2"))
@@ -706,31 +811,18 @@ static int __init mmtimer_init(void)
	maxn++;

	/* Allocate list of node ptrs to mmtimer_t's */
	timers = kzalloc(sizeof(mmtimer_t *)*maxn, GFP_KERNEL);
	timers = kzalloc(sizeof(struct mmtimer_node)*maxn, GFP_KERNEL);
	if (timers == NULL) {
		printk(KERN_ERR "%s: failed to allocate memory for device\n",
				MMTIMER_NAME);
		goto out3;
	}

	/* Allocate mmtimer_t's for each online node */
	/* Initialize struct mmtimer's for each online node */
	for_each_online_node(node) {
		timers[node] = kmalloc_node(sizeof(mmtimer_t)*NUM_COMPARATORS, GFP_KERNEL, node);
		if (timers[node] == NULL) {
			printk(KERN_ERR "%s: failed to allocate memory for device\n",
				MMTIMER_NAME);
			goto out4;
		}
		for (i=0; i< NUM_COMPARATORS; i++) {
			mmtimer_t * base = timers[node] + i;

			spin_lock_init(&base->lock);
			base->timer = NULL;
			base->cpu = 0;
			base->i = i;
			tasklet_init(&base->tasklet, mmtimer_tasklet,
				(unsigned long) (base));
		}
		spin_lock_init(&timers[node].lock);
		tasklet_init(&timers[node].tasklet, mmtimer_tasklet,
			(unsigned long) node);
	}

	sgi_clock_period = sgi_clock.res = NSEC_PER_SEC / sn_rtc_cycles_per_second;
@@ -741,11 +833,8 @@ static int __init mmtimer_init(void)

	return 0;

out4:
	for_each_online_node(node) {
		kfree(timers[node]);
	}
out3:
	kfree(timers);
	misc_deregister(&mmtimer_miscdev);
out2:
	free_irq(SGI_MMTIMER_VECTOR, NULL);
@@ -754,4 +843,3 @@ static int __init mmtimer_init(void)
}

module_init(mmtimer_init);