Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5b2941b1 authored by David S. Miller's avatar David S. Miller
Browse files


Jesse Gross says:

====================
A number of significant new features and optimizations for net-next/3.12.
Highlights are:
 * "Megaflows", an optimization that allows userspace to specify which
   flow fields were used to compute the results of the flow lookup.
   This allows for a major reduction in flow setups (the major
   performance bottleneck in Open vSwitch) without reducing flexibility.
 * Converting netlink dump operations to use RCU, allowing for
   additional parallelism in userspace.
 * Matching and modifying SCTP protocol fields.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b6750b40 5828cd9a
Loading
Loading
Loading
Loading
+40 −0
Original line number Original line Diff line number Diff line
@@ -91,6 +91,46 @@ Often we ellipsize arguments not important to the discussion, e.g.:
    in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...)
    in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...)




Wildcarded flow key format
--------------------------

A wildcarded flow is described with two sequences of Netlink attributes
passed over the Netlink socket. A flow key, exactly as described above, and an
optional corresponding flow mask.

A wildcarded flow can represent a group of exact match flows. Each '1' bit
in the mask specifies a exact match with the corresponding bit in the flow key.
A '0' bit specifies a don't care bit, which will match either a '1' or '0' bit
of a incoming packet. Using wildcarded flow can improve the flow set up rate
by reduce the number of new flows need to be processed by the user space program.

Support for the mask Netlink attribute is optional for both the kernel and user
space program. The kernel can ignore the mask attribute, installing an exact
match flow, or reduce the number of don't care bits in the kernel to less than
what was specified by the user space program. In this case, variations in bits
that the kernel does not implement will simply result in additional flow setups.
The kernel module will also work with user space programs that neither support
nor supply flow mask attributes.

Since the kernel may ignore or modify wildcard bits, it can be difficult for
the userspace program to know exactly what matches are installed. There are
two possible approaches: reactively install flows as they miss the kernel
flow table (and therefore not attempt to determine wildcard changes at all)
or use the kernel's response messages to determine the installed wildcards.

When interacting with userspace, the kernel should maintain the match portion
of the key exactly as originally installed. This will provides a handle to
identify the flow for all future operations. However, when reporting the
mask of an installed flow, the mask should include any restrictions imposed
by the kernel.

The behavior when using overlapping wildcarded flows is undefined. It is the
responsibility of the user space program to ensure that any incoming packet
can match at most one flow, wildcarded or not. The current implementation
performs best-effort detection of overlapping wildcarded flows and may reject
some but not all of them. However, this behavior may change in future versions.


Basic rule for evolving flow keys
Basic rule for evolving flow keys
---------------------------------
---------------------------------


+1 −0
Original line number Original line Diff line number Diff line
@@ -41,6 +41,7 @@
#define NEXTHDR_ICMP		58	/* ICMP for IPv6. */
#define NEXTHDR_ICMP		58	/* ICMP for IPv6. */
#define NEXTHDR_NONE		59	/* No next header */
#define NEXTHDR_NONE		59	/* No next header */
#define NEXTHDR_DEST		60	/* Destination options header. */
#define NEXTHDR_DEST		60	/* Destination options header. */
#define NEXTHDR_SCTP		132	/* SCTP message. */
#define NEXTHDR_MOBILITY	135	/* Mobility header. */
#define NEXTHDR_MOBILITY	135	/* Mobility header. */


#define NEXTHDR_MAX		255
#define NEXTHDR_MAX		255
+14 −1
Original line number Original line Diff line number Diff line


/*
/*
 * Copyright (c) 2007-2011 Nicira Networks.
 * Copyright (c) 2007-2013 Nicira, Inc.
 *
 *
 * This program is free software; you can redistribute it and/or
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * modify it under the terms of version 2 of the GNU General Public
@@ -259,6 +259,7 @@ enum ovs_key_attr {
	OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
	OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
	OVS_KEY_ATTR_SKB_MARK,  /* u32 skb mark */
	OVS_KEY_ATTR_SKB_MARK,  /* u32 skb mark */
	OVS_KEY_ATTR_TUNNEL,    /* Nested set of ovs_tunnel attributes */
	OVS_KEY_ATTR_TUNNEL,    /* Nested set of ovs_tunnel attributes */
	OVS_KEY_ATTR_SCTP,      /* struct ovs_key_sctp */


#ifdef __KERNEL__
#ifdef __KERNEL__
	OVS_KEY_ATTR_IPV4_TUNNEL,  /* struct ovs_key_ipv4_tunnel */
	OVS_KEY_ATTR_IPV4_TUNNEL,  /* struct ovs_key_ipv4_tunnel */
@@ -333,6 +334,11 @@ struct ovs_key_udp {
	__be16 udp_dst;
	__be16 udp_dst;
};
};


struct ovs_key_sctp {
	__be16 sctp_src;
	__be16 sctp_dst;
};

struct ovs_key_icmp {
struct ovs_key_icmp {
	__u8 icmp_type;
	__u8 icmp_type;
	__u8 icmp_code;
	__u8 icmp_code;
@@ -379,6 +385,12 @@ struct ovs_key_nd {
 * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
 * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
 * last-used time, accumulated TCP flags, and statistics for this flow.
 * last-used time, accumulated TCP flags, and statistics for this flow.
 * Otherwise ignored in requests.  Never present in notifications.
 * Otherwise ignored in requests.  Never present in notifications.
 * @OVS_FLOW_ATTR_MASK: Nested %OVS_KEY_ATTR_* attributes specifying the
 * mask bits for wildcarded flow match. Mask bit value '1' specifies exact
 * match with corresponding flow key bit, while mask bit value '0' specifies
 * a wildcarded match. Omitting attribute is treated as wildcarding all
 * corresponding fields. Optional for all requests. If not present,
 * all flow key bits are exact match bits.
 *
 *
 * These attributes follow the &struct ovs_header within the Generic Netlink
 * These attributes follow the &struct ovs_header within the Generic Netlink
 * payload for %OVS_FLOW_* commands.
 * payload for %OVS_FLOW_* commands.
@@ -391,6 +403,7 @@ enum ovs_flow_attr {
	OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
	OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
	OVS_FLOW_ATTR_USED,      /* u64 msecs last used in monotonic time. */
	OVS_FLOW_ATTR_USED,      /* u64 msecs last used in monotonic time. */
	OVS_FLOW_ATTR_CLEAR,     /* Flag to clear stats, tcp_flags, used. */
	OVS_FLOW_ATTR_CLEAR,     /* Flag to clear stats, tcp_flags, used. */
	OVS_FLOW_ATTR_MASK,      /* Sequence of OVS_KEY_ATTR_* attributes. */
	__OVS_FLOW_ATTR_MAX
	__OVS_FLOW_ATTR_MAX
};
};


+1 −0
Original line number Original line Diff line number Diff line
@@ -4,6 +4,7 @@


config OPENVSWITCH
config OPENVSWITCH
	tristate "Open vSwitch"
	tristate "Open vSwitch"
	select LIBCRC32C
	---help---
	---help---
	  Open vSwitch is a multilayer Ethernet switch targeted at virtualized
	  Open vSwitch is a multilayer Ethernet switch targeted at virtualized
	  environments.  In addition to supporting a variety of features
	  environments.  In addition to supporting a variety of features
+4 −1
Original line number Original line Diff line number Diff line
@@ -10,10 +10,13 @@ openvswitch-y := \
	dp_notify.o \
	dp_notify.o \
	flow.o \
	flow.o \
	vport.o \
	vport.o \
	vport-gre.o \
	vport-internal_dev.o \
	vport-internal_dev.o \
	vport-netdev.o
	vport-netdev.o


ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
openvswitch-y += vport-vxlan.o
openvswitch-y += vport-vxlan.o
endif
endif

ifneq ($(CONFIG_OPENVSWITCH_GRE),)
openvswitch-y += vport-gre.o
endif
Loading