Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ffe95ecf authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-remove-dst-garbage-collector-logic'



Wei Wang says:

====================
remove dst garbage collector logic

The current mechanism of dst release is a bit complicated. It is because
the users of dst get divided into 2 situations:
  1. Most users take the reference count when using a dst and release the
     reference count when done.
  2. Exceptional users like IPv4/IPv6/decnet/xfrm routing code do not take
     reference count when referencing to a dst due to some histotic reasons.

Due to those exceptional use cases in 2, reference count being 0 is not an
adequate evidence to indicate that no user is using this dst. So users in 1
can't free the dst simply based on reference count being 0 because users in
2 might still hold reference to it.
Instead, a dst garbage list is needed to hold the dst entries that already
get removed by the users in 2 but are still held by users in 1. And a periodic
garbage collector task is run to check all the dst entries in the list to see
if the users in 1 have released the reference to those dst entries.
If so, the dst is now ready to be freed.

This logic introduces unnecessary complications in the dst code which makes it
hard to understand and to debug.

In order to get rid of the whole dst garbage collector (gc) and make the dst
code more unified and simplified, we can make the users in 2 also take reference
count on the dst and release it properly when done.
This way, dst can be safely freed once the refcount drops to 0 and no gc
thread is needed anymore.

This patch series' target is to completely get rid of dst gc logic and free
dst based on reference count only.
Patch 1-3 are preparation patches to do some cleanup/improvement on the existing
code to make later work easier.
Patch 4-21 are real implementations.
In these patches, a temporary flag DST_NOGC is used to help transition
those exceptional users one by one. Once every component is transitioned,
this temporary flag is removed.
By the end of this patch series, all dst are refcounted when being used
and released when done. And dst will be freed when its refcount drops to 0.
No dst gc task is running anymore.

Note: This patch series depends on the decnet fix that was sent right before:
      "decnet: always not take dst->__refcnt when inserting dst into hash table"

v2:
  add curly braces in udp_v4/6_early_demux() in patch 02
  add EXPORT_SYMBOL() for dst_dev_put() in patch 05
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 273889e3 44ebe791
Loading
Loading
Loading
Loading
+1 −5
Original line number Diff line number Diff line
@@ -563,7 +563,7 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)

static int vrf_rt6_create(struct net_device *dev)
{
	int flags = DST_HOST | DST_NOPOLICY | DST_NOXFRM | DST_NOCACHE;
	int flags = DST_HOST | DST_NOPOLICY | DST_NOXFRM;
	struct net_vrf *vrf = netdev_priv(dev);
	struct net *net = dev_net(dev);
	struct fib6_table *rt6i_table;
@@ -583,8 +583,6 @@ static int vrf_rt6_create(struct net_device *dev)
	if (!rt6)
		goto out;

	dst_hold(&rt6->dst);

	rt6->rt6i_table = rt6i_table;
	rt6->dst.output	= vrf_output6;

@@ -597,8 +595,6 @@ static int vrf_rt6_create(struct net_device *dev)
		goto out;
	}

	dst_hold(&rt6_local->dst);

	rt6_local->rt6i_idev  = in6_dev_get(dev);
	rt6_local->rt6i_flags = RTF_UP | RTF_NONEXTHOP | RTF_LOCAL;
	rt6_local->rt6i_table = rt6i_table;
+10 −33
Original line number Diff line number Diff line
@@ -51,13 +51,11 @@ struct dst_entry {
#define DST_HOST		0x0001
#define DST_NOXFRM		0x0002
#define DST_NOPOLICY		0x0004
#define DST_NOHASH		0x0008
#define DST_NOCACHE		0x0010
#define DST_NOCOUNT		0x0020
#define DST_FAKE_RTABLE		0x0040
#define DST_XFRM_TUNNEL		0x0080
#define DST_XFRM_QUEUE		0x0100
#define DST_METADATA		0x0200
#define DST_NOCOUNT		0x0008
#define DST_FAKE_RTABLE		0x0010
#define DST_XFRM_TUNNEL		0x0020
#define DST_XFRM_QUEUE		0x0040
#define DST_METADATA		0x0080

	short			error;

@@ -253,7 +251,7 @@ static inline void dst_hold(struct dst_entry *dst)
	 * __pad_to_align_refcnt declaration in struct dst_entry
	 */
	BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63);
	atomic_inc(&dst->__refcnt);
	WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0);
}

static inline void dst_use(struct dst_entry *dst, unsigned long time)
@@ -278,6 +276,8 @@ static inline struct dst_entry *dst_clone(struct dst_entry *dst)

void dst_release(struct dst_entry *dst);

void dst_release_immediate(struct dst_entry *dst);

static inline void refdst_drop(unsigned long refdst)
{
	if (!(refdst & SKB_DST_NOREF))
@@ -334,10 +334,7 @@ static inline void skb_dst_force(struct sk_buff *skb)
 */
static inline bool dst_hold_safe(struct dst_entry *dst)
{
	if (dst->flags & DST_NOCACHE)
	return atomic_inc_not_zero(&dst->__refcnt);
	dst_hold(dst);
	return true;
}

/**
@@ -423,26 +420,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref,
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
	      struct net_device *dev, int initial_ref, int initial_obsolete,
	      unsigned short flags);
void __dst_free(struct dst_entry *dst);
struct dst_entry *dst_destroy(struct dst_entry *dst);

static inline void dst_free(struct dst_entry *dst)
{
	if (dst->obsolete > 0)
		return;
	if (!atomic_read(&dst->__refcnt)) {
		dst = dst_destroy(dst);
		if (!dst)
			return;
	}
	__dst_free(dst);
}

static inline void dst_rcu_free(struct rcu_head *head)
{
	struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
	dst_free(dst);
}
void dst_dev_put(struct dst_entry *dst);

static inline void dst_confirm(struct dst_entry *dst)
{
@@ -505,8 +484,6 @@ static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
	return dst;
}

void dst_subsys_init(void);

/* Flags for xfrm_lookup flags argument. */
enum {
	XFRM_LOOKUP_ICMP = 1 << 0,
+1 −1
Original line number Diff line number Diff line
@@ -170,7 +170,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
static inline u32 rt6_get_cookie(const struct rt6_info *rt)
{
	if (rt->rt6i_flags & RTF_PCPU ||
	    (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
		rt = (struct rt6_info *)(rt->dst.from);

	return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+0 −1
Original line number Diff line number Diff line
@@ -116,7 +116,6 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
			    const struct in6_addr *saddr, int oif, int flags);

struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
int icmp6_dst_gc(void);

void fib6_force_start_gc(struct net *net);

+3 −1
Original line number Diff line number Diff line
@@ -190,7 +190,9 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
	rcu_read_lock();
	err = ip_route_input_noref(skb, dst, src, tos, devin);
	if (!err)
		skb_dst_force(skb);
		skb_dst_force_safe(skb);
	if (!skb_dst(skb))
		err = -EINVAL;
	rcu_read_unlock();

	return err;
Loading