Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0a5143f2 authored by David Howells's avatar David Howells
Browse files

afs: Implement VL server rotation



Track VL servers as independent entities rather than lumping all their
addresses together into one set and implement server-level rotation by:

 (1) Add the concept of a VL server list, where each server has its own
     separate address list.  This code is similar to the FS server list.

 (2) Use the DNS resolver to retrieve a set of servers and their associated
     addresses, ports, preference and weight ratings.

 (3) In the case of a legacy DNS resolver or an address list given directly
     through /proc/net/afs/cells, create a list containing just a dummy
     server record and attach all the addresses to that.

 (4) Implement a simple rotation policy, for the moment ignoring the
     priorities and weights assigned to the servers.

 (5) Show the address list through /proc/net/afs/<cell>/vlservers.  This
     also displays the source and status of the data as indicated by the
     upcall.

Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent e7f680f4
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -29,6 +29,8 @@ kafs-y := \
	super.o \
	netdevices.o \
	vlclient.o \
	vl_rotate.o \
	vl_list.o \
	volume.o \
	write.o \
	xattr.o
+84 −79
Original line number Diff line number Diff line
@@ -64,19 +64,25 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
/*
 * Parse a text string consisting of delimited addresses.
 */
struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
					       const char *text, size_t len,
					       char delim,
					       unsigned short service,
					       unsigned short port)
{
	struct afs_vlserver_list *vllist;
	struct afs_addr_list *alist;
	const char *p, *end = text + len;
	const char *problem;
	unsigned int nr = 0;
	int ret = -ENOMEM;

	_enter("%*.*s,%c", (int)len, (int)len, text, delim);

	if (!len)
	if (!len) {
		_leave(" = -EDESTADDRREQ [empty]");
		return ERR_PTR(-EDESTADDRREQ);
	}

	if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
		delim = ',';
@@ -84,18 +90,24 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
	/* Count the addresses */
	p = text;
	do {
		if (!*p)
			return ERR_PTR(-EINVAL);
		if (!*p) {
			problem = "nul";
			goto inval;
		}
		if (*p == delim)
			continue;
		nr++;
		if (*p == '[') {
			p++;
			if (p == end)
				return ERR_PTR(-EINVAL);
			if (p == end) {
				problem = "brace1";
				goto inval;
			}
			p = memchr(p, ']', end - p);
			if (!p)
				return ERR_PTR(-EINVAL);
			if (!p) {
				problem = "brace2";
				goto inval;
			}
			p++;
			if (p >= end)
				break;
@@ -109,10 +121,19 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,

	_debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);

	alist = afs_alloc_addrlist(nr, service, port);
	if (!alist)
	vllist = afs_alloc_vlserver_list(1);
	if (!vllist)
		return ERR_PTR(-ENOMEM);

	vllist->nr_servers = 1;
	vllist->servers[0].server = afs_alloc_vlserver("<dummy>", 7, AFS_VL_PORT);
	if (!vllist->servers[0].server)
		goto error_vl;

	alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT);
	if (!alist)
		goto error;

	/* Extract the addresses */
	p = text;
	do {
@@ -135,17 +156,21 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
					break;
		}

		if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop))
		if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) {
			family = AF_INET;
		else if (in6_pton(p, q - p, (u8 *)x, -1, &stop))
		} else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) {
			family = AF_INET6;
		else
		} else {
			problem = "family";
			goto bad_address;
		}

		if (stop != q)
		p = q;
		if (stop != p) {
			problem = "nostop";
			goto bad_address;
		}

		p = q;
		if (q < end && *q == ']')
			p++;

@@ -154,18 +179,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
				/* Port number specification "+1234" */
				xport = 0;
				p++;
				if (p >= end || !isdigit(*p))
				if (p >= end || !isdigit(*p)) {
					problem = "port";
					goto bad_address;
				}
				do {
					xport *= 10;
					xport += *p - '0';
					if (xport > 65535)
					if (xport > 65535) {
						problem = "pval";
						goto bad_address;
					}
					p++;
				} while (p < end && isdigit(*p));
			} else if (*p == delim) {
				p++;
			} else {
				problem = "weird";
				goto bad_address;
			}
		}
@@ -177,12 +207,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,

	} while (p < end);

	rcu_assign_pointer(vllist->servers[0].server->addresses, alist);
	_leave(" = [nr %u]", alist->nr_addrs);
	return alist;
	return vllist;

bad_address:
	kfree(alist);
inval:
	_leave(" = -EINVAL [%s %zu %*.*s]",
	       problem, p - text, (int)len, (int)len, text);
	return ERR_PTR(-EINVAL);
bad_address:
	_leave(" = -EINVAL [%s %zu %*.*s]",
	       problem, p - text, (int)len, (int)len, text);
	ret = -EINVAL;
error:
	afs_put_addrlist(alist);
error_vl:
	afs_put_vlserverlist(net, vllist);
	return ERR_PTR(ret);
}

/*
@@ -201,30 +242,34 @@ static int afs_cmp_addr_list(const struct afs_addr_list *a1,
/*
 * Perform a DNS query for VL servers and build a up an address list.
 */
struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
{
	struct afs_addr_list *alist;
	char *vllist = NULL;
	struct afs_vlserver_list *vllist;
	char *result = NULL;
	int ret;

	_enter("%s", cell->name);

	ret = dns_query("afsdb", cell->name, cell->name_len,
			"", &vllist, _expiry);
	if (ret < 0)
	ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1",
			&result, _expiry);
	if (ret < 0) {
		_leave(" = %d [dns]", ret);
		return ERR_PTR(ret);
	}

	if (*_expiry == 0)
		*_expiry = ktime_get_real_seconds() + 60;

	alist = afs_parse_text_addrs(vllist, strlen(vllist), ',',
	if (ret > 1 && result[0] == 0)
		vllist = afs_extract_vlserver_list(cell, result, ret);
	else
		vllist = afs_parse_text_addrs(cell->net, result, ret, ',',
					      VL_SERVICE, AFS_VL_PORT);
	if (IS_ERR(alist)) {
		kfree(vllist);
		if (alist != ERR_PTR(-ENOMEM))
			pr_err("Failed to parse DNS data\n");
		return alist;
	}
	kfree(result);
	if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM))
		pr_err("Failed to parse DNS data %ld\n", PTR_ERR(vllist));

	kfree(vllist);
	return alist;
	return vllist;
}

/*
@@ -347,43 +392,3 @@ int afs_end_cursor(struct afs_addr_cursor *ac)
	ac->begun = false;
	return ac->error;
}

/*
 * Set the address cursor for iterating over VL servers.
 */
int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell)
{
	struct afs_addr_list *alist;
	int ret;

	if (!rcu_access_pointer(cell->vl_addrs)) {
		ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
				  TASK_INTERRUPTIBLE);
		if (ret < 0)
			return ret;

		if (!rcu_access_pointer(cell->vl_addrs) &&
		    ktime_get_real_seconds() < cell->dns_expiry)
			return cell->error;
	}

	read_lock(&cell->vl_addrs_lock);
	alist = rcu_dereference_protected(cell->vl_addrs,
					  lockdep_is_held(&cell->vl_addrs_lock));
	if (alist->nr_addrs > 0)
		afs_get_addrlist(alist);
	else
		alist = NULL;
	read_unlock(&cell->vl_addrs_lock);

	if (!alist)
		return -EDESTADDRREQ;

	ac->alist = alist;
	ac->addr = NULL;
	ac->start = READ_ONCE(alist->index);
	ac->index = ac->start;
	ac->error = 0;
	ac->begun = false;
	return 0;
}
+20 −19
Original line number Diff line number Diff line
@@ -119,7 +119,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
 */
static struct afs_cell *afs_alloc_cell(struct afs_net *net,
				       const char *name, unsigned int namelen,
				       const char *vllist)
				       const char *addresses)
{
	struct afs_cell *cell;
	int i, ret;
@@ -134,7 +134,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
	if (namelen == 5 && memcmp(name, "@cell", 5) == 0)
		return ERR_PTR(-EINVAL);

	_enter("%*.*s,%s", namelen, namelen, name, vllist);
	_enter("%*.*s,%s", namelen, namelen, name, addresses);

	cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL);
	if (!cell) {
@@ -153,22 +153,23 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
		       (1 << AFS_CELL_FL_NO_LOOKUP_YET));
	INIT_LIST_HEAD(&cell->proc_volumes);
	rwlock_init(&cell->proc_lock);
	rwlock_init(&cell->vl_addrs_lock);
	rwlock_init(&cell->vl_servers_lock);

	/* Fill in the VL server list if we were given a list of addresses to
	 * use.
	 */
	if (vllist) {
		struct afs_addr_list *alist;
	if (addresses) {
		struct afs_vlserver_list *vllist;

		alist = afs_parse_text_addrs(vllist, strlen(vllist), ':',
		vllist = afs_parse_text_addrs(net,
					      addresses, strlen(addresses), ':',
					      VL_SERVICE, AFS_VL_PORT);
		if (IS_ERR(alist)) {
			ret = PTR_ERR(alist);
		if (IS_ERR(vllist)) {
			ret = PTR_ERR(vllist);
			goto parse_failed;
		}

		rcu_assign_pointer(cell->vl_addrs, alist);
		rcu_assign_pointer(cell->vl_servers, vllist);
		cell->dns_expiry = TIME64_MAX;
	}

@@ -356,14 +357,14 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
 */
static void afs_update_cell(struct afs_cell *cell)
{
	struct afs_addr_list *alist, *old;
	struct afs_vlserver_list *vllist, *old;
	time64_t now, expiry;

	_enter("%s", cell->name);

	alist = afs_dns_query(cell, &expiry);
	if (IS_ERR(alist)) {
		switch (PTR_ERR(alist)) {
	vllist = afs_dns_query(cell, &expiry);
	if (IS_ERR(vllist)) {
		switch (PTR_ERR(vllist)) {
		case -ENODATA:
			/* The DNS said that the cell does not exist */
			set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
@@ -387,12 +388,12 @@ static void afs_update_cell(struct afs_cell *cell)
		/* Exclusion on changing vl_addrs is achieved by a
		 * non-reentrant work item.
		 */
		old = rcu_dereference_protected(cell->vl_addrs, true);
		rcu_assign_pointer(cell->vl_addrs, alist);
		old = rcu_dereference_protected(cell->vl_servers, true);
		rcu_assign_pointer(cell->vl_servers, vllist);
		cell->dns_expiry = expiry;

		if (old)
			afs_put_addrlist(old);
			afs_put_vlserverlist(cell->net, old);
	}

	if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
@@ -414,7 +415,7 @@ static void afs_cell_destroy(struct rcu_head *rcu)

	ASSERTCMP(atomic_read(&cell->usage), ==, 0);

	afs_put_addrlist(rcu_access_pointer(cell->vl_addrs));
	afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers));
	key_put(cell->anonymous_key);
	kfree(cell);

+1 −1
Original line number Diff line number Diff line
@@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry)
		return 0;
	}

	ret = dns_query("afsdb", name, len, "", NULL, NULL);
	ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL);
	if (ret == -ENODATA)
		ret = -EDESTADDRREQ;
	return ret;
+101 −13
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#include <linux/backing-dev.h>
#include <linux/uuid.h>
#include <linux/mm_types.h>
#include <linux/dns_resolver.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/sock.h>
@@ -77,6 +78,8 @@ struct afs_addr_list {
	unsigned char		nr_addrs;
	unsigned char		index;		/* Address currently in use */
	unsigned char		nr_ipv4;	/* Number of IPv4 addresses */
	enum dns_record_source	source:8;
	enum dns_lookup_status	status:8;
	unsigned long		probed;		/* Mask of servers that have been probed */
	unsigned long		yfs;		/* Mask of servers that are YFS */
	struct sockaddr_rxrpc	addrs[];
@@ -355,12 +358,51 @@ struct afs_cell {
	rwlock_t		proc_lock;

	/* VL server list. */
	rwlock_t		vl_addrs_lock;	/* Lock on vl_addrs */
	struct afs_addr_list	__rcu *vl_addrs; /* List of VL servers */
	rwlock_t		vl_servers_lock; /* Lock on vl_servers */
	struct afs_vlserver_list __rcu *vl_servers;

	u8			name_len;	/* Length of name */
	char			name[64 + 1];	/* Cell name, case-flattened and NUL-padded */
};

/*
 * Volume Location server record.
 */
struct afs_vlserver {
	struct rcu_head		rcu;
	struct afs_addr_list	__rcu *addresses; /* List of addresses for this VL server */
	unsigned long		flags;
#define AFS_VLSERVER_FL_PROBED	0		/* The VL server has been probed */
#define AFS_VLSERVER_FL_PROBING	1		/* VL server is being probed */
	rwlock_t		lock;		/* Lock on addresses */
	atomic_t		usage;
	u16			name_len;	/* Length of name */
	u16			port;
	char			name[];		/* Server name, case-flattened */
};

/*
 * Weighted list of Volume Location servers.
 */
struct afs_vlserver_entry {
	u16			priority;	/* Preference (as SRV) */
	u16			weight;		/* Weight (as SRV) */
	enum dns_record_source	source:8;
	enum dns_lookup_status	status:8;
	struct afs_vlserver	*server;
};

struct afs_vlserver_list {
	struct rcu_head		rcu;
	atomic_t		usage;
	u8			nr_servers;
	u8			index;		/* Server currently in use */
	enum dns_record_source	source:8;
	enum dns_lookup_status	status:8;
	rwlock_t		lock;
	struct afs_vlserver_entry servers[];
};

/*
 * Cached VLDB entry.
 *
@@ -616,6 +658,23 @@ struct afs_addr_cursor {
	bool			responded;	/* T if the current address responded */
};

/*
 * Cursor for iterating over a set of volume location servers.
 */
struct afs_vl_cursor {
	struct afs_addr_cursor	ac;
	struct afs_cell		*cell;		/* The cell we're querying */
	struct afs_vlserver_list *server_list;	/* Current server list (pins ref) */
	struct key		*key;		/* Key for the server */
	unsigned char		start;		/* Initial index in server list */
	unsigned char		index;		/* Number of servers tried beyond start */
	short			error;
	unsigned short		flags;
#define AFS_VL_CURSOR_STOP	0x0001		/* Set to cease iteration */
#define AFS_VL_CURSOR_RETRY	0x0002		/* Set to do a retry */
#define AFS_VL_CURSOR_RETRIED	0x0004		/* Set if started a retry */
};

/*
 * Cursor for iterating over a set of fileservers.
 */
@@ -662,12 +721,12 @@ extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
						unsigned short,
						unsigned short);
extern void afs_put_addrlist(struct afs_addr_list *);
extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char,
extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *,
						      const char *, size_t, char,
						      unsigned short, unsigned short);
extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *);
extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *);
extern bool afs_iterate_addresses(struct afs_addr_cursor *);
extern int afs_end_cursor(struct afs_addr_cursor *);
extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);

extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
@@ -1088,14 +1147,43 @@ extern void afs_fs_exit(void);
/*
 * vlclient.c
 */
extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *,
							 struct afs_addr_cursor *,
							 struct key *, const char *, int);
extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *,
						struct key *, const uuid_t *);
extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
							 const char *, int);
extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *,
						     struct key *, const uuid_t *);
extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);

/*
 * vl_rotate.c
 */
extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *,
					 struct afs_cell *, struct key *);
extern bool afs_select_vlserver(struct afs_vl_cursor *);
extern bool afs_select_current_vlserver(struct afs_vl_cursor *);
extern int afs_end_vlserver_operation(struct afs_vl_cursor *);

/*
 * vlserver_list.c
 */
static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver)
{
	atomic_inc(&vlserver->usage);
	return vlserver;
}

static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist)
{
	if (vllist)
		atomic_inc(&vllist->usage);
	return vllist;
}

extern struct afs_vlserver *afs_alloc_vlserver(const char *, size_t, unsigned short);
extern void afs_put_vlserver(struct afs_net *, struct afs_vlserver *);
extern struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int);
extern void afs_put_vlserverlist(struct afs_net *, struct afs_vlserver_list *);
extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
							   const void *, size_t);

/*
 * volume.c
Loading