Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f0a40400 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'ovs-L3-encap'



Jiri Benc says:

====================
openvswitch: support for layer 3 encapsulated packets

At the core of this patch set is removing the assumption in Open vSwitch
datapath that all packets have Ethernet header.

The implementation relies on the presence of pop_eth and push_eth actions
in datapath flows to facilitate adding and removing Ethernet headers as
appropriate. The construction of such flows is left up to user-space.

This series is based on work by Simon Horman, Lorand Jakab, Thomas Morin and
others. I kept Lorand's and Simon's s-o-b in the patches that are derived
from v11 to record their authorship of parts of the code.

Changes from v12 to v13:

* Addressed Pravin's feedback.
* Removed the GRE vport conversion patch; L3 GRE ports should be created by
  rtnetlink instead.

Main changes from v11 to v12:

* The patches were restructured and split differently for easier review.
* They were rebased and adjusted to the current net-next. Especially MPLS
  handling is different (and easier) thanks to the recent MPLS GSO rework.
* Several bugs were discovered and fixed. The most notable is fragment
  handling: header adjustment for ARPHRD_NONE devices on tx needs to be done
  after refragmentation, not before it. This required significant changes in
  the patchset. Another one is stricter checking of attributes (match on L2
  vs. L3 packet) at the kernel level.
* Instead of is_layer3 bool, a mac_proto field is used.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c540594f 217ac77a
Loading
Loading
Loading
Loading
+15 −0
Original line number Original line Diff line number Diff line
@@ -705,6 +705,15 @@ enum ovs_nat_attr {


#define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1)
#define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1)


/*
 * struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument.
 * @addresses: Source and destination MAC addresses.
 * @eth_type: Ethernet type
 */
struct ovs_action_push_eth {
	struct ovs_key_ethernet addresses;
};

/**
/**
 * enum ovs_action_attr - Action types.
 * enum ovs_action_attr - Action types.
 *
 *
@@ -738,6 +747,10 @@ enum ovs_nat_attr {
 * is no MPLS label stack, as determined by ethertype, no action is taken.
 * is no MPLS label stack, as determined by ethertype, no action is taken.
 * @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related
 * @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related
 * entries in the flow key.
 * entries in the flow key.
 * @OVS_ACTION_ATTR_PUSH_ETH: Push a new outermost Ethernet header onto the
 * packet.
 * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the
 * packet.
 *
 *
 * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
 * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
 * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
 * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -765,6 +778,8 @@ enum ovs_action_attr {
				       * bits. */
				       * bits. */
	OVS_ACTION_ATTR_CT,           /* Nested OVS_CT_ATTR_* . */
	OVS_ACTION_ATTR_CT,           /* Nested OVS_CT_ATTR_* . */
	OVS_ACTION_ATTR_TRUNC,        /* u32 struct ovs_action_trunc. */
	OVS_ACTION_ATTR_TRUNC,        /* u32 struct ovs_action_trunc. */
	OVS_ACTION_ATTR_PUSH_ETH,     /* struct ovs_action_push_eth. */
	OVS_ACTION_ATTR_POP_ETH,      /* No argument. */


	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
				       * from userspace. */
				       * from userspace. */
+81 −30
Original line number Original line Diff line number Diff line
@@ -66,6 +66,7 @@ struct ovs_frag_data {
	u16 vlan_tci;
	u16 vlan_tci;
	__be16 vlan_proto;
	__be16 vlan_proto;
	unsigned int l2_len;
	unsigned int l2_len;
	u8 mac_proto;
	u8 l2_data[MAX_L2_LEN];
	u8 l2_data[MAX_L2_LEN];
};
};


@@ -137,12 +138,12 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,


static void invalidate_flow_key(struct sw_flow_key *key)
static void invalidate_flow_key(struct sw_flow_key *key)
{
{
	key->eth.type = htons(0);
	key->mac_proto |= SW_FLOW_KEY_INVALID;
}
}


static bool is_flow_key_valid(const struct sw_flow_key *key)
static bool is_flow_key_valid(const struct sw_flow_key *key)
{
{
	return !!key->eth.type;
	return !(key->mac_proto & SW_FLOW_KEY_INVALID);
}
}


static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
@@ -186,6 +187,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,


	skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
	skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);


	if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET)
		update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
		update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
	skb->protocol = mpls->mpls_ethertype;
	skb->protocol = mpls->mpls_ethertype;


@@ -196,7 +198,6 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
		    const __be16 ethertype)
		    const __be16 ethertype)
{
{
	struct ethhdr *hdr;
	int err;
	int err;


	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
@@ -212,11 +213,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
	skb_reset_mac_header(skb);
	skb_reset_mac_header(skb);
	skb_set_network_header(skb, skb->mac_len);
	skb_set_network_header(skb, skb->mac_len);


	if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) {
		struct ethhdr *hdr;

		/* mpls_hdr() is used to locate the ethertype field correctly in the
		/* mpls_hdr() is used to locate the ethertype field correctly in the
		 * presence of VLAN tags.
		 * presence of VLAN tags.
		 */
		 */
		hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
		hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
		update_ethertype(skb, hdr, ethertype);
		update_ethertype(skb, hdr, ethertype);
	}
	if (eth_p_mpls(skb->protocol))
	if (eth_p_mpls(skb->protocol))
		skb->protocol = ethertype;
		skb->protocol = ethertype;


@@ -312,6 +317,47 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
	return 0;
	return 0;
}
}


/* pop_eth does not support VLAN packets as this action is never called
 * for them.
 */
static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
{
	skb_pull_rcsum(skb, ETH_HLEN);
	skb_reset_mac_header(skb);
	skb_reset_mac_len(skb);

	/* safe right before invalidate_flow_key */
	key->mac_proto = MAC_PROTO_NONE;
	invalidate_flow_key(key);
	return 0;
}

static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
		    const struct ovs_action_push_eth *ethh)
{
	struct ethhdr *hdr;

	/* Add the new Ethernet header */
	if (skb_cow_head(skb, ETH_HLEN) < 0)
		return -ENOMEM;

	skb_push(skb, ETH_HLEN);
	skb_reset_mac_header(skb);
	skb_reset_mac_len(skb);

	hdr = eth_hdr(skb);
	ether_addr_copy(hdr->h_source, ethh->addresses.eth_src);
	ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst);
	hdr->h_proto = skb->protocol;

	skb_postpush_rcsum(skb, hdr, ETH_HLEN);

	/* safe right before invalidate_flow_key */
	key->mac_proto = MAC_PROTO_ETHERNET;
	invalidate_flow_key(key);
	return 0;
}

static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
				  __be32 addr, __be32 new_addr)
				  __be32 addr, __be32 new_addr)
{
{
@@ -673,7 +719,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk
		skb_reset_mac_len(skb);
		skb_reset_mac_len(skb);
	}
	}


	ovs_vport_send(vport, skb);
	ovs_vport_send(vport, skb, data->mac_proto);
	return 0;
	return 0;
}
}


@@ -692,7 +738,7 @@ static struct dst_ops ovs_dst_ops = {
 * ovs_vport_output(), which is called once per fragmented packet.
 * ovs_vport_output(), which is called once per fragmented packet.
 */
 */
static void prepare_frag(struct vport *vport, struct sk_buff *skb,
static void prepare_frag(struct vport *vport, struct sk_buff *skb,
			 u16 orig_network_offset)
			 u16 orig_network_offset, u8 mac_proto)
{
{
	unsigned int hlen = skb_network_offset(skb);
	unsigned int hlen = skb_network_offset(skb);
	struct ovs_frag_data *data;
	struct ovs_frag_data *data;
@@ -705,6 +751,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
	data->network_offset = orig_network_offset;
	data->network_offset = orig_network_offset;
	data->vlan_tci = skb->vlan_tci;
	data->vlan_tci = skb->vlan_tci;
	data->vlan_proto = skb->vlan_proto;
	data->vlan_proto = skb->vlan_proto;
	data->mac_proto = mac_proto;
	data->l2_len = hlen;
	data->l2_len = hlen;
	memcpy(&data->l2_data, skb->data, hlen);
	memcpy(&data->l2_data, skb->data, hlen);


@@ -713,7 +760,8 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
}
}


static void ovs_fragment(struct net *net, struct vport *vport,
static void ovs_fragment(struct net *net, struct vport *vport,
			 struct sk_buff *skb, u16 mru, __be16 ethertype)
			 struct sk_buff *skb, u16 mru,
			 struct sw_flow_key *key)
{
{
	u16 orig_network_offset = 0;
	u16 orig_network_offset = 0;


@@ -727,11 +775,12 @@ static void ovs_fragment(struct net *net, struct vport *vport,
		goto err;
		goto err;
	}
	}


	if (ethertype == htons(ETH_P_IP)) {
	if (key->eth.type == htons(ETH_P_IP)) {
		struct dst_entry ovs_dst;
		struct dst_entry ovs_dst;
		unsigned long orig_dst;
		unsigned long orig_dst;


		prepare_frag(vport, skb, orig_network_offset);
		prepare_frag(vport, skb, orig_network_offset,
			     ovs_key_mac_proto(key));
		dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
		dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
			 DST_OBSOLETE_NONE, DST_NOCOUNT);
			 DST_OBSOLETE_NONE, DST_NOCOUNT);
		ovs_dst.dev = vport->dev;
		ovs_dst.dev = vport->dev;
@@ -742,7 +791,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,


		ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
		ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
		refdst_drop(orig_dst);
		refdst_drop(orig_dst);
	} else if (ethertype == htons(ETH_P_IPV6)) {
	} else if (key->eth.type == htons(ETH_P_IPV6)) {
		const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
		const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
		unsigned long orig_dst;
		unsigned long orig_dst;
		struct rt6_info ovs_rt;
		struct rt6_info ovs_rt;
@@ -751,7 +800,8 @@ static void ovs_fragment(struct net *net, struct vport *vport,
			goto err;
			goto err;
		}
		}


		prepare_frag(vport, skb, orig_network_offset);
		prepare_frag(vport, skb, orig_network_offset,
			     ovs_key_mac_proto(key));
		memset(&ovs_rt, 0, sizeof(ovs_rt));
		memset(&ovs_rt, 0, sizeof(ovs_rt));
		dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
		dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
			 DST_OBSOLETE_NONE, DST_NOCOUNT);
			 DST_OBSOLETE_NONE, DST_NOCOUNT);
@@ -765,7 +815,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
		refdst_drop(orig_dst);
		refdst_drop(orig_dst);
	} else {
	} else {
		WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
		WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
			  ovs_vport_name(vport), ntohs(ethertype), mru,
			  ovs_vport_name(vport), ntohs(key->eth.type), mru,
			  vport->dev->mtu);
			  vport->dev->mtu);
		goto err;
		goto err;
	}
	}
@@ -785,26 +835,19 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
		u32 cutlen = OVS_CB(skb)->cutlen;
		u32 cutlen = OVS_CB(skb)->cutlen;


		if (unlikely(cutlen > 0)) {
		if (unlikely(cutlen > 0)) {
			if (skb->len - cutlen > ETH_HLEN)
			if (skb->len - cutlen > ovs_mac_header_len(key))
				pskb_trim(skb, skb->len - cutlen);
				pskb_trim(skb, skb->len - cutlen);
			else
			else
				pskb_trim(skb, ETH_HLEN);
				pskb_trim(skb, ovs_mac_header_len(key));
		}
		}


		if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
		if (likely(!mru ||
			ovs_vport_send(vport, skb);
		           (skb->len <= mru + vport->dev->hard_header_len))) {
			ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
		} else if (mru <= vport->dev->mtu) {
		} else if (mru <= vport->dev->mtu) {
			struct net *net = read_pnet(&dp->net);
			struct net *net = read_pnet(&dp->net);
			__be16 ethertype = key->eth.type;


			if (!is_flow_key_valid(key)) {
			ovs_fragment(net, vport, skb, mru, key);
				if (eth_p_mpls(skb->protocol))
					ethertype = skb->inner_protocol;
				else
					ethertype = vlan_get_protocol(skb);
			}

			ovs_fragment(net, vport, skb, mru, ethertype);
		} else {
		} else {
			kfree_skb(skb);
			kfree_skb(skb);
		}
		}
@@ -1198,6 +1241,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
			if (err)
			if (err)
				return err == -EINPROGRESS ? 0 : err;
				return err == -EINPROGRESS ? 0 : err;
			break;
			break;

		case OVS_ACTION_ATTR_PUSH_ETH:
			err = push_eth(skb, key, nla_data(a));
			break;

		case OVS_ACTION_ATTR_POP_ETH:
			err = pop_eth(skb, key);
			break;
		}
		}


		if (unlikely(err)) {
		if (unlikely(err)) {
+1 −12
Original line number Original line Diff line number Diff line
@@ -560,7 +560,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
	struct sw_flow *flow;
	struct sw_flow *flow;
	struct sw_flow_actions *sf_acts;
	struct sw_flow_actions *sf_acts;
	struct datapath *dp;
	struct datapath *dp;
	struct ethhdr *eth;
	struct vport *input_vport;
	struct vport *input_vport;
	u16 mru = 0;
	u16 mru = 0;
	int len;
	int len;
@@ -581,17 +580,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)


	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);


	skb_reset_mac_header(packet);
	eth = eth_hdr(packet);

	/* Normally, setting the skb 'protocol' field would be handled by a
	 * call to eth_type_trans(), but it assumes there's a sending
	 * device, which we may not have. */
	if (eth_proto_is_802_3(eth->h_proto))
		packet->protocol = eth->h_proto;
	else
		packet->protocol = htons(ETH_P_802_2);

	/* Set packet's mru */
	/* Set packet's mru */
	if (a[OVS_PACKET_ATTR_MRU]) {
	if (a[OVS_PACKET_ATTR_MRU]) {
		mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
		mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
@@ -618,6 +606,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
	rcu_assign_pointer(flow->sf_acts, acts);
	rcu_assign_pointer(flow->sf_acts, acts);
	packet->priority = flow->key.phy.priority;
	packet->priority = flow->key.phy.priority;
	packet->mark = flow->key.phy.skb_mark;
	packet->mark = flow->key.phy.skb_mark;
	packet->protocol = flow->key.eth.type;


	rcu_read_lock();
	rcu_read_lock();
	dp = get_dp_rcu(net, ovs_header->dp_ifindex);
	dp = get_dp_rcu(net, ovs_header->dp_ifindex);
+81 −24
Original line number Original line Diff line number Diff line
@@ -334,14 +334,17 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
	return 1;
	return 1;
}
}


static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
static void clear_vlan(struct sw_flow_key *key)
{
{
	int res;

	key->eth.vlan.tci = 0;
	key->eth.vlan.tci = 0;
	key->eth.vlan.tpid = 0;
	key->eth.vlan.tpid = 0;
	key->eth.cvlan.tci = 0;
	key->eth.cvlan.tci = 0;
	key->eth.cvlan.tpid = 0;
	key->eth.cvlan.tpid = 0;
}

static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
{
	int res;


	if (skb_vlan_tag_present(skb)) {
	if (skb_vlan_tag_present(skb)) {
		key->eth.vlan.tci = htons(skb->vlan_tci);
		key->eth.vlan.tci = htons(skb->vlan_tci);
@@ -483,17 +486,20 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 *
 *
 * Returns 0 if successful, otherwise a negative errno value.
 * Returns 0 if successful, otherwise a negative errno value.
 *
 *
 * Initializes @skb header pointers as follows:
 * Initializes @skb header fields as follows:
 *
 *
 *    - skb->mac_header: the Ethernet header.
 *    - skb->mac_header: the L2 header.
 *
 *
 *    - skb->network_header: just past the Ethernet header, or just past the
 *    - skb->network_header: just past the L2 header, or just past the
 *      VLAN header, to the first byte of the Ethernet payload.
 *      VLAN header, to the first byte of the L2 payload.
 *
 *
 *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
 *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
 *      on output, then just past the IP header, if one is present and
 *      on output, then just past the IP header, if one is present and
 *      of a correct length, otherwise the same as skb->network_header.
 *      of a correct length, otherwise the same as skb->network_header.
 *      For other key->eth.type values it is left untouched.
 *      For other key->eth.type values it is left untouched.
 *
 *    - skb->protocol: the type of the data starting at skb->network_header.
 *      Equals to key->eth.type.
 */
 */
static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
{
@@ -505,9 +511,14 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)


	skb_reset_mac_header(skb);
	skb_reset_mac_header(skb);


	/* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
	/* Link layer. */
	 * header in the linear data area.
	clear_vlan(key);
	 */
	if (key->mac_proto == MAC_PROTO_NONE) {
		if (unlikely(eth_type_vlan(skb->protocol)))
			return -EINVAL;

		skb_reset_network_header(skb);
	} else {
		eth = eth_hdr(skb);
		eth = eth_hdr(skb);
		ether_addr_copy(key->eth.src, eth->h_source);
		ether_addr_copy(key->eth.src, eth->h_source);
		ether_addr_copy(key->eth.dst, eth->h_dest);
		ether_addr_copy(key->eth.dst, eth->h_dest);
@@ -520,13 +531,15 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
		if (unlikely(parse_vlan(skb, key)))
		if (unlikely(parse_vlan(skb, key)))
			return -ENOMEM;
			return -ENOMEM;


	key->eth.type = parse_ethertype(skb);
		skb->protocol = parse_ethertype(skb);
	if (unlikely(key->eth.type == htons(0)))
		if (unlikely(skb->protocol == htons(0)))
			return -ENOMEM;
			return -ENOMEM;


		skb_reset_network_header(skb);
		skb_reset_network_header(skb);
	skb_reset_mac_len(skb);
		__skb_push(skb, skb->data - skb_mac_header(skb));
		__skb_push(skb, skb->data - skb_mac_header(skb));
	}
	skb_reset_mac_len(skb);
	key->eth.type = skb->protocol;


	/* Network layer. */
	/* Network layer. */
	if (key->eth.type == htons(ETH_P_IP)) {
	if (key->eth.type == htons(ETH_P_IP)) {
@@ -721,9 +734,25 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
	return key_extract(skb, key);
	return key_extract(skb, key);
}
}


static int key_extract_mac_proto(struct sk_buff *skb)
{
	switch (skb->dev->type) {
	case ARPHRD_ETHER:
		return MAC_PROTO_ETHERNET;
	case ARPHRD_NONE:
		if (skb->protocol == htons(ETH_P_TEB))
			return MAC_PROTO_ETHERNET;
		return MAC_PROTO_NONE;
	}
	WARN_ON_ONCE(1);
	return -EINVAL;
}

int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
			 struct sk_buff *skb, struct sw_flow_key *key)
			 struct sk_buff *skb, struct sw_flow_key *key)
{
{
	int res;

	/* Extract metadata from packet. */
	/* Extract metadata from packet. */
	if (tun_info) {
	if (tun_info) {
		key->tun_proto = ip_tunnel_info_af(tun_info);
		key->tun_proto = ip_tunnel_info_af(tun_info);
@@ -751,6 +780,10 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
	key->phy.skb_mark = skb->mark;
	key->phy.skb_mark = skb->mark;
	ovs_ct_fill_key(skb, key);
	ovs_ct_fill_key(skb, key);
	key->ovs_flow_hash = 0;
	key->ovs_flow_hash = 0;
	res = key_extract_mac_proto(skb);
	if (res < 0)
		return res;
	key->mac_proto = res;
	key->recirc_id = 0;
	key->recirc_id = 0;


	return key_extract(skb, key);
	return key_extract(skb, key);
@@ -767,5 +800,29 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
	if (err)
	if (err)
		return err;
		return err;


	if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
		/* key_extract assumes that skb->protocol is set-up for
		 * layer 3 packets which is the case for other callers,
		 * in particular packets recieved from the network stack.
		 * Here the correct value can be set from the metadata
		 * extracted above.
		 */
		skb->protocol = key->eth.type;
	} else {
		struct ethhdr *eth;

		skb_reset_mac_header(skb);
		eth = eth_hdr(skb);

		/* Normally, setting the skb 'protocol' field would be
		 * handled by a call to eth_type_trans(), but it assumes
		 * there's a sending device, which we may not have.
		 */
		if (eth_proto_is_802_3(eth->h_proto))
			skb->protocol = eth->h_proto;
		else
			skb->protocol = htons(ETH_P_802_2);
	}

	return key_extract(skb, key);
	return key_extract(skb, key);
}
}
+22 −0
Original line number Original line Diff line number Diff line
@@ -37,6 +37,12 @@


struct sk_buff;
struct sk_buff;


enum sw_flow_mac_proto {
	MAC_PROTO_NONE = 0,
	MAC_PROTO_ETHERNET,
};
#define SW_FLOW_KEY_INVALID	0x80

/* Store options at the end of the array if they are less than the
/* Store options at the end of the array if they are less than the
 * maximum size. This allows us to get the benefits of variable length
 * maximum size. This allows us to get the benefits of variable length
 * matching for small options.
 * matching for small options.
@@ -68,6 +74,7 @@ struct sw_flow_key {
		u32	skb_mark;	/* SKB mark. */
		u32	skb_mark;	/* SKB mark. */
		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
	} __packed phy; /* Safe when right after 'tun_key'. */
	} __packed phy; /* Safe when right after 'tun_key'. */
	u8 mac_proto;			/* MAC layer protocol (e.g. Ethernet). */
	u8 tun_proto;			/* Protocol of encapsulating tunnel. */
	u8 tun_proto;			/* Protocol of encapsulating tunnel. */
	u32 ovs_flow_hash;		/* Datapath computed hash value.  */
	u32 ovs_flow_hash;		/* Datapath computed hash value.  */
	u32 recirc_id;			/* Recirculation ID.  */
	u32 recirc_id;			/* Recirculation ID.  */
@@ -206,6 +213,21 @@ struct arp_eth_header {
	unsigned char       ar_tip[4];		/* target IP address        */
	unsigned char       ar_tip[4];		/* target IP address        */
} __packed;
} __packed;


static inline u8 ovs_key_mac_proto(const struct sw_flow_key *key)
{
	return key->mac_proto & ~SW_FLOW_KEY_INVALID;
}

static inline u16 __ovs_mac_header_len(u8 mac_proto)
{
	return mac_proto == MAC_PROTO_ETHERNET ? ETH_HLEN : 0;
}

static inline u16 ovs_mac_header_len(const struct sw_flow_key *key)
{
	return __ovs_mac_header_len(ovs_key_mac_proto(key));
}

static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
{
{
	return sfid->ufid_len;
	return sfid->ufid_len;
Loading