Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 36bdfc8b authored by Greg Banks's avatar Greg Banks Committed by Linus Torvalds
Browse files

[PATCH] knfsd: move tempsock aging to a timer



Following are 11 patches from Greg Banks which combine to make knfsd more
Numa-aware.  They reduce hitting on 'global' data structures, and create some
data-structures that can be node-local.

knfsd threads are bound to a particular node, and the thread to handle a new
request is chosen from the threads that are attach to the node that received
the interrupt.

The distribution of threads across nodes can be controlled by a new file in
the 'nfsd' filesystem, though the default approach of an even spread is
probably fine for most sites.

Some (old) numbers that show the efficacy of these patches: N == number of
NICs == number of CPUs == nmber of clients.  Number of NUMA nodes == N/2

N	Throughput, MiB/s	CPU usage, % (max=N*100)
	Before	After		Before	After
	---	------	----		-----	-----
	4	312	435		350	228
	6	500	656		501	418
	8	562	804		690	589

This patch:

Move the aging of RPC/TCP connection sockets from the main svc_recv() loop to
a timer which uses a mark-and-sweep algorithm every 6 minutes.  This reduces
the amount of work that needs to be done in the main RPC loop and the length
of time we need to hold the (effectively global) svc_serv->sv_lock.

[akpm@osdl.org: cleanup]
Signed-off-by: default avatarGreg Banks <gnb@melbourne.sgi.com>
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 4a3ae42d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@ struct svc_serv {
	struct list_head	sv_permsocks;	/* all permanent sockets */
	struct list_head	sv_tempsocks;	/* all temporary sockets */
	int			sv_tmpcnt;	/* count of temporary sockets */
	struct timer_list	sv_temptimer;	/* timer for aging temporary sockets */

	char *			sv_name;	/* service name */

+2 −0
Original line number Diff line number Diff line
@@ -31,6 +31,8 @@ struct svc_sock {
#define	SK_DEAD		6			/* socket closed */
#define	SK_CHNGBUF	7			/* need to change snd/rcv buffer sizes */
#define	SK_DEFERRED	8			/* request on sk_deferred */
#define	SK_OLD		9			/* used for temp socket aging mark+sweep */
#define	SK_DETACHED	10			/* detached from tempsocks list */

	int			sk_reserved;	/* space on outq that is reserved */

+3 −0
Original line number Diff line number Diff line
@@ -59,6 +59,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize,
	INIT_LIST_HEAD(&serv->sv_sockets);
	INIT_LIST_HEAD(&serv->sv_tempsocks);
	INIT_LIST_HEAD(&serv->sv_permsocks);
	init_timer(&serv->sv_temptimer);
	spin_lock_init(&serv->sv_lock);

	/* Remove any stale portmap registrations */
@@ -87,6 +88,8 @@ svc_destroy(struct svc_serv *serv)
	} else
		printk("svc_destroy: no threads for serv=%p!\n", serv);

	del_timer_sync(&serv->sv_temptimer);

	while (!list_empty(&serv->sv_tempsocks)) {
		svsk = list_entry(serv->sv_tempsocks.next,
				  struct svc_sock,
+70 −26
Original line number Diff line number Diff line
@@ -74,6 +74,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);

/* apparently the "standard" is that clients close
 * idle connections after 5 minutes, servers after
 * 6 minutes
 *   http://www.connectathon.org/talks96/nfstcp.pdf
 */
static int svc_conn_age_period = 6*60;

/*
 * Queue up an idle server thread.  Must have serv->sv_lock held.
 * Note: this is really a stack rather than a queue, so that we only
@@ -1220,24 +1227,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
		return -EINTR;

	spin_lock_bh(&serv->sv_lock);
	if (!list_empty(&serv->sv_tempsocks)) {
		svsk = list_entry(serv->sv_tempsocks.next,
				  struct svc_sock, sk_list);
		/* apparently the "standard" is that clients close
		 * idle connections after 5 minutes, servers after
		 * 6 minutes
		 *   http://www.connectathon.org/talks96/nfstcp.pdf 
		 */
		if (get_seconds() - svsk->sk_lastrecv < 6*60
		    || test_bit(SK_BUSY, &svsk->sk_flags))
			svsk = NULL;
	}
	if (svsk) {
		set_bit(SK_BUSY, &svsk->sk_flags);
		set_bit(SK_CLOSE, &svsk->sk_flags);
		rqstp->rq_sock = svsk;
		svsk->sk_inuse++;
	} else if ((svsk = svc_sock_dequeue(serv)) != NULL) {
	if ((svsk = svc_sock_dequeue(serv)) != NULL) {
		rqstp->rq_sock = svsk;
		svsk->sk_inuse++;
		rqstp->rq_reserved = serv->sv_bufsz;	
@@ -1282,13 +1272,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
		return -EAGAIN;
	}
	svsk->sk_lastrecv = get_seconds();
	if (test_bit(SK_TEMP, &svsk->sk_flags)) {
		/* push active sockets to end of list */
		spin_lock_bh(&serv->sv_lock);
		if (!list_empty(&svsk->sk_list))
			list_move_tail(&svsk->sk_list, &serv->sv_tempsocks);
		spin_unlock_bh(&serv->sv_lock);
	}
	clear_bit(SK_OLD, &svsk->sk_flags);

	rqstp->rq_secure  = ntohs(rqstp->rq_addr.sin_port) < 1024;
	rqstp->rq_chandle.defer = svc_defer;
@@ -1347,6 +1331,58 @@ svc_send(struct svc_rqst *rqstp)
	return len;
}

/*
 * Timer function to close old temporary sockets, using
 * a mark-and-sweep algorithm.
 */
static void
svc_age_temp_sockets(unsigned long closure)
{
	struct svc_serv *serv = (struct svc_serv *)closure;
	struct svc_sock *svsk;
	struct list_head *le, *next;
	LIST_HEAD(to_be_aged);

	dprintk("svc_age_temp_sockets\n");

	if (!spin_trylock_bh(&serv->sv_lock)) {
		/* busy, try again 1 sec later */
		dprintk("svc_age_temp_sockets: busy\n");
		mod_timer(&serv->sv_temptimer, jiffies + HZ);
		return;
	}

	list_for_each_safe(le, next, &serv->sv_tempsocks) {
		svsk = list_entry(le, struct svc_sock, sk_list);

		if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
			continue;
		if (svsk->sk_inuse || test_bit(SK_BUSY, &svsk->sk_flags))
			continue;
		svsk->sk_inuse++;
		list_move(le, &to_be_aged);
		set_bit(SK_CLOSE, &svsk->sk_flags);
		set_bit(SK_DETACHED, &svsk->sk_flags);
	}
	spin_unlock_bh(&serv->sv_lock);

	while (!list_empty(&to_be_aged)) {
		le = to_be_aged.next;
		/* fiddling the sk_list node is safe 'cos we're SK_DETACHED */
		list_del_init(le);
		svsk = list_entry(le, struct svc_sock, sk_list);

		dprintk("queuing svsk %p for closing, %lu seconds old\n",
			svsk, get_seconds() - svsk->sk_lastrecv);

		/* a thread will dequeue and close it soon */
		svc_sock_enqueue(svsk);
		svc_sock_put(svsk);
	}

	mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
}

/*
 * Initialize socket for RPC use and create svc_sock struct
 * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
@@ -1400,6 +1436,13 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
		set_bit(SK_TEMP, &svsk->sk_flags);
		list_add(&svsk->sk_list, &serv->sv_tempsocks);
		serv->sv_tmpcnt++;
		if (serv->sv_temptimer.function == NULL) {
			/* setup timer to age temp sockets */
			setup_timer(&serv->sv_temptimer, svc_age_temp_sockets,
					(unsigned long)serv);
			mod_timer(&serv->sv_temptimer,
					jiffies + svc_conn_age_period * HZ);
		}
	} else {
		clear_bit(SK_TEMP, &svsk->sk_flags);
		list_add(&svsk->sk_list, &serv->sv_permsocks);
@@ -1513,6 +1556,7 @@ svc_delete_socket(struct svc_sock *svsk)

	spin_lock_bh(&serv->sv_lock);

	if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags))
		list_del_init(&svsk->sk_list);
	list_del_init(&svsk->sk_ready);
	if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))