Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b33cc2ce authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-ipv6-Add-support-for-path-selection-using-hash-of-5-tuple'



David Ahern says:

====================
net/ipv6: Add support for path selection using hash of 5-tuple

Hardware supports multipath selection using the standard L4 5-tuple
instead of just L3 and the flow label. In addition, some network
operators prefer IPv6 path selection to use the 5-tuple. To that end,
add support to IPv6 for multipath hash policy similar to
bf4e0a3d ("net: ipv4: add support for ECMP hash policy choice").
The default is still L3 which covers source and destination addresses
along with flow label and IPv6 protocol. This gives users a choice in
hash algorithms if they believe L3 only and the IPv6 flow label are not
sufficient for their use case.

A separate sysctl is added for IPv6, allowing IPv4 and IPv6 to use
different algorithms if desired.

The first 3 patches modify the IPv4 variant so that at the end of the
patch set the ipv4 and ipv6 implementations are direct parallels.

Patch 4 refactors the existing rt6_multipath_hash in preparation for
adding the policy option.

Patch 5 renames the existing netevent to have IPv4 in the name so ipv4
changes can be distinguished from IPv6 if the netevent handler cares.

Patch 6 adds the skb as an argument through the FIB lookup functions
to the multipath selection. Needed for the forwarding case.

Patch 7 adds the L4 hash support.

Patch 8 adds the hook for the netevent to the spectrum driver to update
the ASIC.

Patch 9 removes no longer used code.

Patch 10 adds a testcase for IPv6 multipath with L4 hash.

v3
- comments from Ido:
  - removed fib_info arg in patch 1; left by mistake on rebase to net-next
  - removed __get_hash_from_flowi4 declaration
  - line wrap change to spectrum_router.c to maintain 80 chars

v2
- rebased to top of tree
- added refactor of fib_multipath_hash following recent change
- plumb skb through lookup functions to multipath selection
- fix sysctl setting; was missing the data set in ipv6_sysctl_net_init
- added test case

RFC to v1:
- rebase to top of net-next
- fix addr_type in hash_keys and removed flow label as noticed by Ido
- added a comment to cover letter about choice in algorithms based on
  use case per Or's comments
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e871cae7 91a5c1ec
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
	FALSE: disabled
	Default: FALSE

fib_multipath_hash_policy - INTEGER
	Controls which hash policy to use for multipath routes.
	Default: 0 (Layer 3)
	Possible values:
	0 - Layer 3 (source and destination addresses plus flow label)
	1 - Layer 4 (standard 5-tuple)

anycast_src_echo_reply - BOOLEAN
	Controls the use of anycast addresses as source addresses for ICMPv6
	echo reply
+1 −1
Original line number Diff line number Diff line
@@ -1334,7 +1334,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev,
			   IPV6_ADDR_LINKLOCAL;
	struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
					 &src_addr->sin6_addr, net_dev->ifindex,
					 strict);
					 NULL, strict);
	bool ret;

	if (!rt)
+15 −2
Original line number Diff line number Diff line
@@ -2430,7 +2430,8 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
		mlxsw_core_schedule_work(&net_work->work);
		mlxsw_sp_port_dev_put(mlxsw_sp_port);
		break;
	case NETEVENT_MULTIPATH_HASH_UPDATE:
	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
		net = ptr;

		if (!net_eq(net, &init_net))
@@ -7030,13 +7031,25 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl)

static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
{
	bool only_l3 = !init_net.ipv6.sysctl.multipath_hash_policy;

	mlxsw_sp_mp_hash_header_set(recr2_pl,
				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
	if (only_l3) {
		mlxsw_sp_mp_hash_field_set(recr2_pl,
					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
	} else {
		mlxsw_sp_mp_hash_header_set(recr2_pl,
					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
		mlxsw_sp_mp_hash_field_set(recr2_pl,
					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
		mlxsw_sp_mp_hash_field_set(recr2_pl,
					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
	}
}

static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
+2 −1
Original line number Diff line number Diff line
@@ -817,7 +817,8 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
		};

		skb_dst_drop(skb);
		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
					     skb, flags);
		skb_dst_set(skb, dst);
		break;
	}
+4 −3
Original line number Diff line number Diff line
@@ -941,6 +941,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
					     const struct net_device *dev,
					     struct flowi6 *fl6,
					     int ifindex,
					     const struct sk_buff *skb,
					     int flags)
{
	struct net_vrf *vrf = netdev_priv(dev);
@@ -959,7 +960,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
	if (!table)
		return NULL;

	return ip6_pol_route(net, table, ifindex, fl6, flags);
	return ip6_pol_route(net, table, ifindex, fl6, skb, flags);
}

static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
@@ -977,7 +978,7 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
	struct net *net = dev_net(vrf_dev);
	struct rt6_info *rt6;

	rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex,
	rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb,
				   RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
	if (unlikely(!rt6))
		return;
@@ -1110,7 +1111,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
	if (!ipv6_addr_any(&fl6->saddr))
		flags |= RT6_LOOKUP_F_HAS_SADDR;

	rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
	rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags);
	if (rt)
		dst = &rt->dst;

Loading