Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ceec4c38 authored by Julian Anastasov's avatar Julian Anastasov Committed by Pablo Neira Ayuso
Browse files

ipvs: convert services to rcu



This is the final step in RCU conversion.

Things that are removed:

- svc->usecnt: now svc is accessed under RCU read lock
- svc->inc: and some unused code
- ip_vs_bind_pe and ip_vs_unbind_pe: no ability to replace PE
- __ip_vs_svc_lock: replaced with RCU
- IP_VS_WAIT_WHILE: now readers lookup svcs and dests under
	RCU and work in parallel with configuration

Other changes:

- before now, a RCU read-side critical section included the
calling of the schedule method, now it is extended to include
service lookup
- ip_vs_svc_table and ip_vs_svc_fwm_table are now using hlist
- svc->pe and svc->scheduler remain to the end (of grace period),
	the schedulers are prepared for such RCU readers
	even after done_service is called but they need
	to use synchronize_rcu because last ip_vs_scheduler_put
	can happen while RCU read-side critical sections
	use an outdated svc->scheduler pointer
- as planned, update_service is removed
- empty services can be freed immediately after grace period.
	If dests were present, the services are freed from
	the dest trash code

Signed-off-by: default avatarJulian Anastasov <ja@ssi.bg>
Signed-off-by: default avatarSimon Horman <horms@verge.net.au>
parent 413c2d04
Loading
Loading
Loading
Loading
+9 −19
Original line number Original line Diff line number Diff line
@@ -359,8 +359,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
#define LeaveFunction(level)   do {} while (0)
#define LeaveFunction(level)   do {} while (0)
#endif
#endif


#define	IP_VS_WAIT_WHILE(expr)	while (expr) { cpu_relax(); }



/*
/*
 *      The port number of FTP service (in network order).
 *      The port number of FTP service (in network order).
@@ -712,10 +710,9 @@ struct ip_vs_dest_user_kern {
 *	and the forwarding entries
 *	and the forwarding entries
 */
 */
struct ip_vs_service {
struct ip_vs_service {
	struct list_head	s_list;   /* for normal service table */
	struct hlist_node	s_list;   /* for normal service table */
	struct list_head	f_list;   /* for fwmark-based service table */
	struct hlist_node	f_list;   /* for fwmark-based service table */
	atomic_t		refcnt;   /* reference counter */
	atomic_t		refcnt;   /* reference counter */
	atomic_t		usecnt;   /* use counter */


	u16			af;       /* address family */
	u16			af;       /* address family */
	__u16			protocol; /* which protocol (TCP/UDP) */
	__u16			protocol; /* which protocol (TCP/UDP) */
@@ -730,15 +727,16 @@ struct ip_vs_service {
	struct list_head	destinations;  /* real server d-linked list */
	struct list_head	destinations;  /* real server d-linked list */
	__u32			num_dests;     /* number of servers */
	__u32			num_dests;     /* number of servers */
	struct ip_vs_stats      stats;         /* statistics for the service */
	struct ip_vs_stats      stats;         /* statistics for the service */
	struct ip_vs_app	*inc;	  /* bind conns to this app inc */


	/* for scheduling */
	/* for scheduling */
	struct ip_vs_scheduler	*scheduler;    /* bound scheduler object */
	struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */
	spinlock_t		sched_lock;    /* lock sched_data */
	spinlock_t		sched_lock;    /* lock sched_data */
	void			*sched_data;   /* scheduler application data */
	void			*sched_data;   /* scheduler application data */


	/* alternate persistence engine */
	/* alternate persistence engine */
	struct ip_vs_pe		*pe;
	struct ip_vs_pe __rcu	*pe;

	struct rcu_head		rcu_head;
};
};


/* Information for cached dst */
/* Information for cached dst */
@@ -807,8 +805,6 @@ struct ip_vs_scheduler {
	int (*init_service)(struct ip_vs_service *svc);
	int (*init_service)(struct ip_vs_service *svc);
	/* scheduling service finish */
	/* scheduling service finish */
	void (*done_service)(struct ip_vs_service *svc);
	void (*done_service)(struct ip_vs_service *svc);
	/* scheduler updating service */
	int (*update_service)(struct ip_vs_service *svc);
	/* dest is linked */
	/* dest is linked */
	int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
	int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
	/* dest is unlinked */
	/* dest is unlinked */
@@ -1344,8 +1340,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);


void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
void ip_vs_unbind_pe(struct ip_vs_service *svc);
int register_ip_vs_pe(struct ip_vs_pe *pe);
int register_ip_vs_pe(struct ip_vs_pe *pe);
int unregister_ip_vs_pe(struct ip_vs_pe *pe);
int unregister_ip_vs_pe(struct ip_vs_pe *pe);
struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
@@ -1392,7 +1386,8 @@ extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int ip_vs_bind_scheduler(struct ip_vs_service *svc,
extern int ip_vs_bind_scheduler(struct ip_vs_service *svc,
				struct ip_vs_scheduler *scheduler);
				struct ip_vs_scheduler *scheduler);
extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc);
extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
				   struct ip_vs_scheduler *sched);
extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern struct ip_vs_conn *
extern struct ip_vs_conn *
@@ -1412,14 +1407,9 @@ extern struct ip_vs_stats ip_vs_stats;
extern int sysctl_ip_vs_sync_ver;
extern int sysctl_ip_vs_sync_ver;


extern struct ip_vs_service *
extern struct ip_vs_service *
ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
		  const union nf_inet_addr *vaddr, __be16 vport);
		  const union nf_inet_addr *vaddr, __be16 vport);


static inline void ip_vs_service_put(struct ip_vs_service *svc)
{
	atomic_dec(&svc->usecnt);
}

extern bool
extern bool
ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
		       const union nf_inet_addr *daddr, __be16 dport);
		       const union nf_inet_addr *daddr, __be16 dport);
+9 −17
Original line number Original line Diff line number Diff line
@@ -203,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
{
{
	ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
	ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
			      vport, p);
			      vport, p);
	p->pe = svc->pe;
	p->pe = rcu_dereference(svc->pe);
	if (p->pe && p->pe->fill_param)
	if (p->pe && p->pe->fill_param)
		return p->pe->fill_param(p, skb);
		return p->pe->fill_param(p, skb);


@@ -296,15 +296,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
	/* Check if a template already exists */
	/* Check if a template already exists */
	ct = ip_vs_ct_in_get(&param);
	ct = ip_vs_ct_in_get(&param);
	if (!ct || !ip_vs_check_template(ct)) {
	if (!ct || !ip_vs_check_template(ct)) {
		struct ip_vs_scheduler *sched;

		/*
		/*
		 * No template found or the dest of the connection
		 * No template found or the dest of the connection
		 * template is not available.
		 * template is not available.
		 * return *ignored=0 i.e. ICMP and NF_DROP
		 * return *ignored=0 i.e. ICMP and NF_DROP
		 */
		 */
		rcu_read_lock();
		sched = rcu_dereference(svc->scheduler);
		dest = svc->scheduler->schedule(svc, skb);
		dest = sched->schedule(svc, skb);
		if (!dest) {
		if (!dest) {
			rcu_read_unlock();
			IP_VS_DBG(1, "p-schedule: no dest found.\n");
			IP_VS_DBG(1, "p-schedule: no dest found.\n");
			kfree(param.pe_data);
			kfree(param.pe_data);
			*ignored = 0;
			*ignored = 0;
@@ -320,7 +321,6 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
		 * when the template expires */
		 * when the template expires */
		ct = ip_vs_conn_new(&param, &dest->addr, dport,
		ct = ip_vs_conn_new(&param, &dest->addr, dport,
				    IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
				    IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
		rcu_read_unlock();
		if (ct == NULL) {
		if (ct == NULL) {
			kfree(param.pe_data);
			kfree(param.pe_data);
			*ignored = -1;
			*ignored = -1;
@@ -394,6 +394,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
{
{
	struct ip_vs_protocol *pp = pd->pp;
	struct ip_vs_protocol *pp = pd->pp;
	struct ip_vs_conn *cp = NULL;
	struct ip_vs_conn *cp = NULL;
	struct ip_vs_scheduler *sched;
	struct ip_vs_dest *dest;
	struct ip_vs_dest *dest;
	__be16 _ports[2], *pptr;
	__be16 _ports[2], *pptr;
	unsigned int flags;
	unsigned int flags;
@@ -449,10 +450,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
		return NULL;
		return NULL;
	}
	}


	rcu_read_lock();
	sched = rcu_dereference(svc->scheduler);
	dest = svc->scheduler->schedule(svc, skb);
	dest = sched->schedule(svc, skb);
	if (dest == NULL) {
	if (dest == NULL) {
		rcu_read_unlock();
		IP_VS_DBG(1, "Schedule: no dest found.\n");
		IP_VS_DBG(1, "Schedule: no dest found.\n");
		return NULL;
		return NULL;
	}
	}
@@ -473,7 +473,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
		cp = ip_vs_conn_new(&p, &dest->addr,
		cp = ip_vs_conn_new(&p, &dest->addr,
				    dest->port ? dest->port : pptr[1],
				    dest->port ? dest->port : pptr[1],
				    flags, dest, skb->mark);
				    flags, dest, skb->mark);
		rcu_read_unlock();
		if (!cp) {
		if (!cp) {
			*ignored = -1;
			*ignored = -1;
			return NULL;
			return NULL;
@@ -510,7 +509,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,


	pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
	pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
	if (pptr == NULL) {
	if (pptr == NULL) {
		ip_vs_service_put(svc);
		return NF_DROP;
		return NF_DROP;
	}
	}


@@ -536,8 +534,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
				      IP_VS_CONN_F_ONE_PACKET : 0;
				      IP_VS_CONN_F_ONE_PACKET : 0;
		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };


		ip_vs_service_put(svc);

		/* create a new connection entry */
		/* create a new connection entry */
		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
		{
		{
@@ -574,12 +570,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
	 * listed in the ipvs table), pass the packets, because it is
	 * listed in the ipvs table), pass the packets, because it is
	 * not ipvs job to decide to drop the packets.
	 * not ipvs job to decide to drop the packets.
	 */
	 */
	if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
	if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
		ip_vs_service_put(svc);
		return NF_ACCEPT;
		return NF_ACCEPT;
	}

	ip_vs_service_put(svc);


	/*
	/*
	 * Notify the client that the destination is unreachable, and
	 * Notify the client that the destination is unreachable, and
+123 −176
Original line number Original line Diff line number Diff line
@@ -55,9 +55,6 @@
/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
static DEFINE_MUTEX(__ip_vs_mutex);
static DEFINE_MUTEX(__ip_vs_mutex);


/* lock for service table */
static DEFINE_RWLOCK(__ip_vs_svc_lock);

/* sysctl variables */
/* sysctl variables */


#ifdef CONFIG_IP_VS_DEBUG
#ifdef CONFIG_IP_VS_DEBUG
@@ -257,9 +254,9 @@ ip_vs_use_count_dec(void)
#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)


/* the service table hashed by <protocol, addr, port> */
/* the service table hashed by <protocol, addr, port> */
static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
/* the service table hashed by fwmark */
/* the service table hashed by fwmark */
static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];




/*
/*
@@ -314,13 +311,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
		 */
		 */
		hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
		hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
					 &svc->addr, svc->port);
					 &svc->addr, svc->port);
		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
		hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
	} else {
	} else {
		/*
		/*
		 *  Hash it by fwmark in svc_fwm_table
		 *  Hash it by fwmark in svc_fwm_table
		 */
		 */
		hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
		hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
		hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
	}
	}


	svc->flags |= IP_VS_SVC_F_HASHED;
	svc->flags |= IP_VS_SVC_F_HASHED;
@@ -344,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)


	if (svc->fwmark == 0) {
	if (svc->fwmark == 0) {
		/* Remove it from the svc_table table */
		/* Remove it from the svc_table table */
		list_del(&svc->s_list);
		hlist_del_rcu(&svc->s_list);
	} else {
	} else {
		/* Remove it from the svc_fwm_table table */
		/* Remove it from the svc_fwm_table table */
		list_del(&svc->f_list);
		hlist_del_rcu(&svc->f_list);
	}
	}


	svc->flags &= ~IP_VS_SVC_F_HASHED;
	svc->flags &= ~IP_VS_SVC_F_HASHED;
@@ -369,7 +366,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
	/* Check for "full" addressed entries */
	/* Check for "full" addressed entries */
	hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
	hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);


	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
	hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
		if ((svc->af == af)
		if ((svc->af == af)
		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
		    && (svc->port == vport)
		    && (svc->port == vport)
@@ -396,7 +393,7 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
	/* Check for fwmark addressed entries */
	/* Check for fwmark addressed entries */
	hash = ip_vs_svc_fwm_hashkey(net, fwmark);
	hash = ip_vs_svc_fwm_hashkey(net, fwmark);


	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
	hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
		if (svc->fwmark == fwmark && svc->af == af
		if (svc->fwmark == fwmark && svc->af == af
		    && net_eq(svc->net, net)) {
		    && net_eq(svc->net, net)) {
			/* HIT */
			/* HIT */
@@ -407,15 +404,14 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
	return NULL;
	return NULL;
}
}


/* Find service, called under RCU lock */
struct ip_vs_service *
struct ip_vs_service *
ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
		   const union nf_inet_addr *vaddr, __be16 vport)
		   const union nf_inet_addr *vaddr, __be16 vport)
{
{
	struct ip_vs_service *svc;
	struct ip_vs_service *svc;
	struct netns_ipvs *ipvs = net_ipvs(net);
	struct netns_ipvs *ipvs = net_ipvs(net);


	read_lock(&__ip_vs_svc_lock);

	/*
	/*
	 *	Check the table hashed by fwmark first
	 *	Check the table hashed by fwmark first
	 */
	 */
@@ -451,10 +447,6 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
	}
	}


  out:
  out:
	if (svc)
		atomic_inc(&svc->usecnt);
	read_unlock(&__ip_vs_svc_lock);

	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
		      fwmark, ip_vs_proto_name(protocol),
		      fwmark, ip_vs_proto_name(protocol),
		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
@@ -471,6 +463,13 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
	dest->svc = svc;
	dest->svc = svc;
}
}


static void ip_vs_service_free(struct ip_vs_service *svc)
{
	if (svc->stats.cpustats)
		free_percpu(svc->stats.cpustats);
	kfree(svc);
}

static void
static void
__ip_vs_unbind_svc(struct ip_vs_dest *dest)
__ip_vs_unbind_svc(struct ip_vs_dest *dest)
{
{
@@ -478,12 +477,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)


	dest->svc = NULL;
	dest->svc = NULL;
	if (atomic_dec_and_test(&svc->refcnt)) {
	if (atomic_dec_and_test(&svc->refcnt)) {
		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
			      svc->fwmark,
			      svc->fwmark,
			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
			      ntohs(svc->port), atomic_read(&svc->usecnt));
			      ntohs(svc->port));
		free_percpu(svc->stats.cpustats);
		ip_vs_service_free(svc);
		kfree(svc);
	}
	}
}
}


@@ -608,7 +606,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
	struct ip_vs_service *svc;
	struct ip_vs_service *svc;
	__be16 port = dport;
	__be16 port = dport;


	svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
	svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);
	if (!svc)
	if (!svc)
		return NULL;
		return NULL;
	if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
	if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
@@ -616,7 +614,6 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
	dest = ip_vs_lookup_dest(svc, daddr, port);
	dest = ip_vs_lookup_dest(svc, daddr, port);
	if (!dest)
	if (!dest)
		dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
		dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
	ip_vs_service_put(svc);
	return dest;
	return dest;
}
}


@@ -774,6 +771,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
		    struct ip_vs_dest_user_kern *udest, int add)
		    struct ip_vs_dest_user_kern *udest, int add)
{
{
	struct netns_ipvs *ipvs = net_ipvs(svc->net);
	struct netns_ipvs *ipvs = net_ipvs(svc->net);
	struct ip_vs_scheduler *sched;
	int conn_flags;
	int conn_flags;


	/* set the weight and the flags */
	/* set the weight and the flags */
@@ -816,29 +814,17 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
	__ip_vs_dst_cache_reset(dest);
	__ip_vs_dst_cache_reset(dest);
	spin_unlock_bh(&dest->dst_lock);
	spin_unlock_bh(&dest->dst_lock);


	if (add)
	sched = rcu_dereference_protected(svc->scheduler, 1);
		ip_vs_start_estimator(svc->net, &dest->stats);

	write_lock_bh(&__ip_vs_svc_lock);

	/* Wait until all other svc users go away */
	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);

	if (add) {
	if (add) {
		ip_vs_start_estimator(svc->net, &dest->stats);
		list_add_rcu(&dest->n_list, &svc->destinations);
		list_add_rcu(&dest->n_list, &svc->destinations);
		svc->num_dests++;
		svc->num_dests++;
		if (svc->scheduler->add_dest)
		if (sched->add_dest)
			svc->scheduler->add_dest(svc, dest);
			sched->add_dest(svc, dest);
	} else {
	} else {
		if (svc->scheduler->upd_dest)
		if (sched->upd_dest)
			svc->scheduler->upd_dest(svc, dest);
			sched->upd_dest(svc, dest);
	}
	}

	/* call the update_service, because server weight may be changed */
	if (svc->scheduler->update_service)
		svc->scheduler->update_service(svc);

	write_unlock_bh(&__ip_vs_svc_lock);
}
}




@@ -1071,14 +1057,13 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
	list_del_rcu(&dest->n_list);
	list_del_rcu(&dest->n_list);
	svc->num_dests--;
	svc->num_dests--;


	if (svcupd && svc->scheduler->del_dest)
	if (svcupd) {
		svc->scheduler->del_dest(svc, dest);
		struct ip_vs_scheduler *sched;


	/*
		sched = rcu_dereference_protected(svc->scheduler, 1);
	 *  Call the update_service function of its scheduler
		if (sched->del_dest)
	 */
			sched->del_dest(svc, dest);
	if (svcupd && svc->scheduler->update_service)
	}
			svc->scheduler->update_service(svc);
}
}




@@ -1103,20 +1088,11 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
		return -ENOENT;
		return -ENOENT;
	}
	}


	write_lock_bh(&__ip_vs_svc_lock);

	/*
	 *	Wait until all other svc users go away.
	 */
	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);

	/*
	/*
	 *	Unlink dest from the service
	 *	Unlink dest from the service
	 */
	 */
	__ip_vs_unlink_dest(svc, dest, 1);
	__ip_vs_unlink_dest(svc, dest, 1);


	write_unlock_bh(&__ip_vs_svc_lock);

	/*
	/*
	 *	Delete the destination
	 *	Delete the destination
	 */
	 */
@@ -1207,7 +1183,6 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
	}
	}


	/* I'm the first user of the service */
	/* I'm the first user of the service */
	atomic_set(&svc->usecnt, 0);
	atomic_set(&svc->refcnt, 0);
	atomic_set(&svc->refcnt, 0);


	svc->af = u->af;
	svc->af = u->af;
@@ -1231,7 +1206,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
	sched = NULL;
	sched = NULL;


	/* Bind the ct retriever */
	/* Bind the ct retriever */
	ip_vs_bind_pe(svc, pe);
	RCU_INIT_POINTER(svc->pe, pe);
	pe = NULL;
	pe = NULL;


	/* Update the virtual service counters */
	/* Update the virtual service counters */
@@ -1247,9 +1222,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
		ipvs->num_services++;
		ipvs->num_services++;


	/* Hash the service into the service table */
	/* Hash the service into the service table */
	write_lock_bh(&__ip_vs_svc_lock);
	ip_vs_svc_hash(svc);
	ip_vs_svc_hash(svc);
	write_unlock_bh(&__ip_vs_svc_lock);


	*svc_p = svc;
	*svc_p = svc;
	/* Now there is a service - full throttle */
	/* Now there is a service - full throttle */
@@ -1259,15 +1232,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,


 out_err:
 out_err:
	if (svc != NULL) {
	if (svc != NULL) {
		ip_vs_unbind_scheduler(svc);
		ip_vs_unbind_scheduler(svc, sched);
		if (svc->inc) {
		ip_vs_service_free(svc);
			local_bh_disable();
			ip_vs_app_inc_put(svc->inc);
			local_bh_enable();
		}
		if (svc->stats.cpustats)
			free_percpu(svc->stats.cpustats);
		kfree(svc);
	}
	}
	ip_vs_scheduler_put(sched);
	ip_vs_scheduler_put(sched);
	ip_vs_pe_put(pe);
	ip_vs_pe_put(pe);
@@ -1317,12 +1283,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
	}
	}
#endif
#endif


	write_lock_bh(&__ip_vs_svc_lock);
	old_sched = rcu_dereference_protected(svc->scheduler, 1);

	if (sched != old_sched) {
	/*
		/* Bind the new scheduler */
	 * Wait until all other svc users go away.
		ret = ip_vs_bind_scheduler(svc, sched);
	 */
		if (ret) {
	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
			old_sched = sched;
			goto out;
		}
		/* Unbind the old scheduler on success */
		ip_vs_unbind_scheduler(svc, old_sched);
	}


	/*
	/*
	 * Set the flags and timeout value
	 * Set the flags and timeout value
@@ -1331,47 +1302,23 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
	svc->timeout = u->timeout * HZ;
	svc->timeout = u->timeout * HZ;
	svc->netmask = u->netmask;
	svc->netmask = u->netmask;


	old_sched = svc->scheduler;
	old_pe = rcu_dereference_protected(svc->pe, 1);
	if (sched != old_sched) {
	if (pe != old_pe)
		/*
		rcu_assign_pointer(svc->pe, pe);
		 * Unbind the old scheduler
		 */
		ip_vs_unbind_scheduler(svc);

		/*
		 * Bind the new scheduler
		 */
		if ((ret = ip_vs_bind_scheduler(svc, sched))) {
			/*
			 * If ip_vs_bind_scheduler fails, restore the old
			 * scheduler.
			 * The main reason of failure is out of memory.
			 *
			 * The question is if the old scheduler can be
			 * restored all the time. TODO: if it cannot be
			 * restored some time, we must delete the service,
			 * otherwise the system may crash.
			 */
			ip_vs_bind_scheduler(svc, old_sched);
			old_sched = sched;
			goto out_unlock;
		}
	}

	old_pe = svc->pe;
	if (pe != old_pe) {
		ip_vs_unbind_pe(svc);
		ip_vs_bind_pe(svc, pe);
	}


out_unlock:
	write_unlock_bh(&__ip_vs_svc_lock);
out:
out:
	ip_vs_scheduler_put(old_sched);
	ip_vs_scheduler_put(old_sched);
	ip_vs_pe_put(old_pe);
	ip_vs_pe_put(old_pe);
	return ret;
	return ret;
}
}


static void ip_vs_service_rcu_free(struct rcu_head *head)
{
	struct ip_vs_service *svc;

	svc = container_of(head, struct ip_vs_service, rcu_head);
	ip_vs_service_free(svc);
}


/*
/*
 *	Delete a service from the service list
 *	Delete a service from the service list
@@ -1394,21 +1341,14 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
	ip_vs_stop_estimator(svc->net, &svc->stats);
	ip_vs_stop_estimator(svc->net, &svc->stats);


	/* Unbind scheduler */
	/* Unbind scheduler */
	old_sched = svc->scheduler;
	old_sched = rcu_dereference_protected(svc->scheduler, 1);
	ip_vs_unbind_scheduler(svc);
	ip_vs_unbind_scheduler(svc, old_sched);
	ip_vs_scheduler_put(old_sched);
	ip_vs_scheduler_put(old_sched);


	/* Unbind persistence engine */
	/* Unbind persistence engine, keep svc->pe */
	old_pe = svc->pe;
	old_pe = rcu_dereference_protected(svc->pe, 1);
	ip_vs_unbind_pe(svc);
	ip_vs_pe_put(old_pe);
	ip_vs_pe_put(old_pe);


	/* Unbind app inc */
	if (svc->inc) {
		ip_vs_app_inc_put(svc->inc);
		svc->inc = NULL;
	}

	/*
	/*
	 *    Unlink the whole destination list
	 *    Unlink the whole destination list
	 */
	 */
@@ -1428,13 +1368,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
	/*
	/*
	 *    Free the service if nobody refers to it
	 *    Free the service if nobody refers to it
	 */
	 */
	if (atomic_read(&svc->refcnt) == 0) {
	if (atomic_dec_and_test(&svc->refcnt)) {
		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
			      svc->fwmark,
			      svc->fwmark,
			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
			      ntohs(svc->port), atomic_read(&svc->usecnt));
			      ntohs(svc->port));
		free_percpu(svc->stats.cpustats);
		call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
		kfree(svc);
	}
	}


	/* decrease the module use count */
	/* decrease the module use count */
@@ -1446,21 +1385,14 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
 */
 */
static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
{
{
	/* Hold svc to avoid double release from dest_trash */
	atomic_inc(&svc->refcnt);
	/*
	/*
	 * Unhash it from the service table
	 * Unhash it from the service table
	 */
	 */
	write_lock_bh(&__ip_vs_svc_lock);

	ip_vs_svc_unhash(svc);
	ip_vs_svc_unhash(svc);


	/*
	 * Wait until all the svc users go away.
	 */
	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);

	__ip_vs_del_service(svc, cleanup);
	__ip_vs_del_service(svc, cleanup);

	write_unlock_bh(&__ip_vs_svc_lock);
}
}


/*
/*
@@ -1482,13 +1414,14 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
static int ip_vs_flush(struct net *net, bool cleanup)
static int ip_vs_flush(struct net *net, bool cleanup)
{
{
	int idx;
	int idx;
	struct ip_vs_service *svc, *nxt;
	struct ip_vs_service *svc;
	struct hlist_node *n;


	/*
	/*
	 * Flush the service table hashed by <netns,protocol,addr,port>
	 * Flush the service table hashed by <netns,protocol,addr,port>
	 */
	 */
	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
		hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
					  s_list) {
					  s_list) {
			if (net_eq(svc->net, net))
			if (net_eq(svc->net, net))
				ip_vs_unlink_service(svc, cleanup);
				ip_vs_unlink_service(svc, cleanup);
@@ -1499,8 +1432,8 @@ static int ip_vs_flush(struct net *net, bool cleanup)
	 * Flush the service table hashed by fwmark
	 * Flush the service table hashed by fwmark
	 */
	 */
	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
		list_for_each_entry_safe(svc, nxt,
		hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
					 &ip_vs_svc_fwm_table[idx], f_list) {
					  f_list) {
			if (net_eq(svc->net, net))
			if (net_eq(svc->net, net))
				ip_vs_unlink_service(svc, cleanup);
				ip_vs_unlink_service(svc, cleanup);
		}
		}
@@ -1558,7 +1491,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
	EnterFunction(2);
	EnterFunction(2);
	mutex_lock(&__ip_vs_mutex);
	mutex_lock(&__ip_vs_mutex);
	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
			if (net_eq(svc->net, net)) {
			if (net_eq(svc->net, net)) {
				list_for_each_entry(dest, &svc->destinations,
				list_for_each_entry(dest, &svc->destinations,
						    n_list) {
						    n_list) {
@@ -1567,7 +1500,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
			}
			}
		}
		}


		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
			if (net_eq(svc->net, net)) {
			if (net_eq(svc->net, net)) {
				list_for_each_entry(dest, &svc->destinations,
				list_for_each_entry(dest, &svc->destinations,
						    n_list) {
						    n_list) {
@@ -1595,12 +1528,10 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
{
{
	struct ip_vs_dest *dest;
	struct ip_vs_dest *dest;


	write_lock_bh(&__ip_vs_svc_lock);
	list_for_each_entry(dest, &svc->destinations, n_list) {
	list_for_each_entry(dest, &svc->destinations, n_list) {
		ip_vs_zero_stats(&dest->stats);
		ip_vs_zero_stats(&dest->stats);
	}
	}
	ip_vs_zero_stats(&svc->stats);
	ip_vs_zero_stats(&svc->stats);
	write_unlock_bh(&__ip_vs_svc_lock);
	return 0;
	return 0;
}
}


@@ -1610,14 +1541,14 @@ static int ip_vs_zero_all(struct net *net)
	struct ip_vs_service *svc;
	struct ip_vs_service *svc;


	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
			if (net_eq(svc->net, net))
			if (net_eq(svc->net, net))
				ip_vs_zero_service(svc);
				ip_vs_zero_service(svc);
		}
		}
	}
	}


	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
			if (net_eq(svc->net, net))
			if (net_eq(svc->net, net))
				ip_vs_zero_service(svc);
				ip_vs_zero_service(svc);
		}
		}
@@ -1945,7 +1876,7 @@ static struct ctl_table vs_vars[] = {


struct ip_vs_iter {
struct ip_vs_iter {
	struct seq_net_private p;  /* Do not move this, netns depends upon it*/
	struct seq_net_private p;  /* Do not move this, netns depends upon it*/
	struct list_head *table;
	struct hlist_head *table;
	int bucket;
	int bucket;
};
};


@@ -1978,7 +1909,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)


	/* look in hash by protocol */
	/* look in hash by protocol */
	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
		hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
			if (net_eq(svc->net, net) && pos-- == 0) {
			if (net_eq(svc->net, net) && pos-- == 0) {
				iter->table = ip_vs_svc_table;
				iter->table = ip_vs_svc_table;
				iter->bucket = idx;
				iter->bucket = idx;
@@ -1989,7 +1920,8 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)


	/* keep looking in fwmark */
	/* keep looking in fwmark */
	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
		hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
					 f_list) {
			if (net_eq(svc->net, net) && pos-- == 0) {
			if (net_eq(svc->net, net) && pos-- == 0) {
				iter->table = ip_vs_svc_fwm_table;
				iter->table = ip_vs_svc_fwm_table;
				iter->bucket = idx;
				iter->bucket = idx;
@@ -2002,17 +1934,16 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
}
}


static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(__ip_vs_svc_lock)
{
{


	read_lock_bh(&__ip_vs_svc_lock);
	rcu_read_lock();
	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
}
}




static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
{
	struct list_head *e;
	struct hlist_node *e;
	struct ip_vs_iter *iter;
	struct ip_vs_iter *iter;
	struct ip_vs_service *svc;
	struct ip_vs_service *svc;


@@ -2025,12 +1956,13 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)


	if (iter->table == ip_vs_svc_table) {
	if (iter->table == ip_vs_svc_table) {
		/* next service in table hashed by protocol */
		/* next service in table hashed by protocol */
		if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
		e = rcu_dereference(hlist_next_rcu(&svc->s_list));
			return list_entry(e, struct ip_vs_service, s_list);
		if (e)

			return hlist_entry(e, struct ip_vs_service, s_list);


		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
			list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
			hlist_for_each_entry_rcu(svc,
						 &ip_vs_svc_table[iter->bucket],
						 s_list) {
						 s_list) {
				return svc;
				return svc;
			}
			}
@@ -2042,12 +1974,14 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
	}
	}


	/* next service in hashed by fwmark */
	/* next service in hashed by fwmark */
	if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
	e = rcu_dereference(hlist_next_rcu(&svc->f_list));
		return list_entry(e, struct ip_vs_service, f_list);
	if (e)
		return hlist_entry(e, struct ip_vs_service, f_list);


 scan_fwmark:
 scan_fwmark:
	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
		list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
		hlist_for_each_entry_rcu(svc,
					 &ip_vs_svc_fwm_table[iter->bucket],
					 f_list)
					 f_list)
			return svc;
			return svc;
	}
	}
@@ -2056,9 +1990,8 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
}


static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
__releases(__ip_vs_svc_lock)
{
{
	read_unlock_bh(&__ip_vs_svc_lock);
	rcu_read_unlock();
}
}




@@ -2076,6 +2009,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
		const struct ip_vs_service *svc = v;
		const struct ip_vs_service *svc = v;
		const struct ip_vs_iter *iter = seq->private;
		const struct ip_vs_iter *iter = seq->private;
		const struct ip_vs_dest *dest;
		const struct ip_vs_dest *dest;
		struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);


		if (iter->table == ip_vs_svc_table) {
		if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
#ifdef CONFIG_IP_VS_IPV6
@@ -2084,18 +2018,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
					   ip_vs_proto_name(svc->protocol),
					   ip_vs_proto_name(svc->protocol),
					   &svc->addr.in6,
					   &svc->addr.in6,
					   ntohs(svc->port),
					   ntohs(svc->port),
					   svc->scheduler->name);
					   sched->name);
			else
			else
#endif
#endif
				seq_printf(seq, "%s  %08X:%04X %s %s ",
				seq_printf(seq, "%s  %08X:%04X %s %s ",
					   ip_vs_proto_name(svc->protocol),
					   ip_vs_proto_name(svc->protocol),
					   ntohl(svc->addr.ip),
					   ntohl(svc->addr.ip),
					   ntohs(svc->port),
					   ntohs(svc->port),
					   svc->scheduler->name,
					   sched->name,
					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
		} else {
		} else {
			seq_printf(seq, "FWM  %08X %s %s",
			seq_printf(seq, "FWM  %08X %s %s",
				   svc->fwmark, svc->scheduler->name,
				   svc->fwmark, sched->name,
				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
		}
		}


@@ -2451,11 +2385,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
	}
	}


	/* Lookup the exact service by <protocol, addr, port> or fwmark */
	/* Lookup the exact service by <protocol, addr, port> or fwmark */
	rcu_read_lock();
	if (usvc.fwmark == 0)
	if (usvc.fwmark == 0)
		svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
		svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
					   &usvc.addr, usvc.port);
					   &usvc.addr, usvc.port);
	else
	else
		svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
		svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
	rcu_read_unlock();


	if (cmd != IP_VS_SO_SET_ADD
	if (cmd != IP_VS_SO_SET_ADD
	    && (svc == NULL || svc->protocol != usvc.protocol)) {
	    && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2507,11 +2443,14 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
static void
static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
{
	struct ip_vs_scheduler *sched;

	sched = rcu_dereference_protected(src->scheduler, 1);
	dst->protocol = src->protocol;
	dst->protocol = src->protocol;
	dst->addr = src->addr.ip;
	dst->addr = src->addr.ip;
	dst->port = src->port;
	dst->port = src->port;
	dst->fwmark = src->fwmark;
	dst->fwmark = src->fwmark;
	strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
	strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));
	dst->flags = src->flags;
	dst->flags = src->flags;
	dst->timeout = src->timeout / HZ;
	dst->timeout = src->timeout / HZ;
	dst->netmask = src->netmask;
	dst->netmask = src->netmask;
@@ -2530,7 +2469,7 @@ __ip_vs_get_service_entries(struct net *net,
	int ret = 0;
	int ret = 0;


	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
			/* Only expose IPv4 entries to old interface */
			/* Only expose IPv4 entries to old interface */
			if (svc->af != AF_INET || !net_eq(svc->net, net))
			if (svc->af != AF_INET || !net_eq(svc->net, net))
				continue;
				continue;
@@ -2549,7 +2488,7 @@ __ip_vs_get_service_entries(struct net *net,
	}
	}


	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
			/* Only expose IPv4 entries to old interface */
			/* Only expose IPv4 entries to old interface */
			if (svc->af != AF_INET || !net_eq(svc->net, net))
			if (svc->af != AF_INET || !net_eq(svc->net, net))
				continue;
				continue;
@@ -2578,11 +2517,13 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
	union nf_inet_addr addr = { .ip = get->addr };
	union nf_inet_addr addr = { .ip = get->addr };
	int ret = 0;
	int ret = 0;


	rcu_read_lock();
	if (get->fwmark)
	if (get->fwmark)
		svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
		svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
	else
	else
		svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
		svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
					   get->port);
					   get->port);
	rcu_read_unlock();


	if (svc) {
	if (svc) {
		int count = 0;
		int count = 0;
@@ -2765,12 +2706,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)


		entry = (struct ip_vs_service_entry *)arg;
		entry = (struct ip_vs_service_entry *)arg;
		addr.ip = entry->addr;
		addr.ip = entry->addr;
		rcu_read_lock();
		if (entry->fwmark)
		if (entry->fwmark)
			svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
			svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
		else
		else
			svc = __ip_vs_service_find(net, AF_INET,
			svc = __ip_vs_service_find(net, AF_INET,
						   entry->protocol, &addr,
						   entry->protocol, &addr,
						   entry->port);
						   entry->port);
		rcu_read_unlock();
		if (svc) {
		if (svc) {
			ip_vs_copy_service(entry, svc);
			ip_vs_copy_service(entry, svc);
			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2927,6 +2870,7 @@ static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
static int ip_vs_genl_fill_service(struct sk_buff *skb,
static int ip_vs_genl_fill_service(struct sk_buff *skb,
				   struct ip_vs_service *svc)
				   struct ip_vs_service *svc)
{
{
	struct ip_vs_scheduler *sched;
	struct nlattr *nl_service;
	struct nlattr *nl_service;
	struct ip_vs_flags flags = { .flags = svc->flags,
	struct ip_vs_flags flags = { .flags = svc->flags,
				     .mask = ~0 };
				     .mask = ~0 };
@@ -2947,7 +2891,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
			goto nla_put_failure;
			goto nla_put_failure;
	}
	}


	if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
	sched = rcu_dereference_protected(svc->scheduler, 1);
	if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) ||
	    (svc->pe &&
	    (svc->pe &&
	     nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
	     nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
	    nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
	    nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
@@ -2998,7 +2943,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,


	mutex_lock(&__ip_vs_mutex);
	mutex_lock(&__ip_vs_mutex);
	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
		hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
			if (++idx <= start || !net_eq(svc->net, net))
			if (++idx <= start || !net_eq(svc->net, net))
				continue;
				continue;
			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -3009,7 +2954,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
	}
	}


	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
			if (++idx <= start || !net_eq(svc->net, net))
			if (++idx <= start || !net_eq(svc->net, net))
				continue;
				continue;
			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -3069,11 +3014,13 @@ static int ip_vs_genl_parse_service(struct net *net,
		usvc->fwmark = 0;
		usvc->fwmark = 0;
	}
	}


	rcu_read_lock();
	if (usvc->fwmark)
	if (usvc->fwmark)
		svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
		svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
	else
	else
		svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
		svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
					   &usvc->addr, usvc->port);
					   &usvc->addr, usvc->port);
	rcu_read_unlock();
	*ret_svc = svc;
	*ret_svc = svc;


	/* If a full entry was requested, check for the additional fields */
	/* If a full entry was requested, check for the additional fields */
@@ -3905,8 +3852,8 @@ int __init ip_vs_control_init(void)


	/* Initialize svc_table, ip_vs_svc_fwm_table */
	/* Initialize svc_table, ip_vs_svc_fwm_table */
	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
		INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
		INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
	}
	}


	smp_wmb();	/* Do we really need it now ? */
	smp_wmb();	/* Do we really need it now ? */
Loading