Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e110861f authored by Lorenzo Colitti's avatar Lorenzo Colitti Committed by David S. Miller
Browse files

net: add a sysctl to reflect the fwmark on replies



Kernel-originated IP packets that have no user socket associated
with them (e.g., ICMP errors and echo replies, TCP RSTs, etc.)
are emitted with a mark of zero. Add a sysctl to make them have
the same mark as the packet they are replying to.

This allows an administrator that wishes to do so to use
mark-based routing, firewalling, etc. for these replies by
marking the original packets inbound.

Tested using user-mode linux:
 - ICMP/ICMPv6 echo replies and errors.
 - TCP RST packets (IPv4 and IPv6).

Signed-off-by: default avatarLorenzo Colitti <lorenzo@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 87e067cd
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -231,6 +231,9 @@ void ipfrag_init(void);

void ip_static_sysctl_init(void);

#define IP4_REPLY_MARK(net, mark) \
	((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)

static inline bool ip_is_fragment(const struct iphdr *iph)
{
	return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
+3 −0
Original line number Diff line number Diff line
@@ -113,6 +113,9 @@ struct frag_hdr {
#define	IP6_MF		0x0001
#define	IP6_OFFSET	0xFFF8

#define IP6_REPLY_MARK(net, mark) \
	((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)

#include <net/sock.h>

/* sysctls */
+2 −0
Original line number Diff line number Diff line
@@ -77,6 +77,8 @@ struct netns_ipv4 {
	int sysctl_ip_no_pmtu_disc;
	int sysctl_ip_fwd_use_pmtu;

	int sysctl_fwmark_reflect;

	struct ping_group_range ping_group_range;

	atomic_t dev_addr_genid;
+1 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 {
	int flowlabel_consistency;
	int icmpv6_time;
	int anycast_src_echo_reply;
	int fwmark_reflect;
};

struct netns_ipv6 {
+9 −2
Original line number Diff line number Diff line
@@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
	struct sock *sk;
	struct inet_sock *inet;
	__be32 daddr, saddr;
	u32 mark = IP4_REPLY_MARK(net, skb->mark);

	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
		return;
@@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
	icmp_param->data.icmph.checksum = 0;

	inet->tos = ip_hdr(skb)->tos;
	sk->sk_mark = mark;
	daddr = ipc.addr = ip_hdr(skb)->saddr;
	saddr = fib_compute_spec_dst(skb);
	ipc.opt = NULL;
@@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
	memset(&fl4, 0, sizeof(fl4));
	fl4.daddr = daddr;
	fl4.saddr = saddr;
	fl4.flowi4_mark = mark;
	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
	fl4.flowi4_proto = IPPROTO_ICMP;
	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
@@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
					struct flowi4 *fl4,
					struct sk_buff *skb_in,
					const struct iphdr *iph,
					__be32 saddr, u8 tos,
					__be32 saddr, u8 tos, u32 mark,
					int type, int code,
					struct icmp_bxm *param)
{
@@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
	fl4->daddr = (param->replyopts.opt.opt.srr ?
		      param->replyopts.opt.opt.faddr : iph->saddr);
	fl4->saddr = saddr;
	fl4->flowi4_mark = mark;
	fl4->flowi4_tos = RT_TOS(tos);
	fl4->flowi4_proto = IPPROTO_ICMP;
	fl4->fl4_icmp_type = type;
@@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
	struct flowi4 fl4;
	__be32 saddr;
	u8  tos;
	u32 mark;
	struct net *net;
	struct sock *sk;

@@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
					   IPTOS_PREC_INTERNETCONTROL) :
					  iph->tos;
	mark = IP4_REPLY_MARK(net, skb_in->mark);

	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
		goto out_unlock;
@@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
	icmp_param->skb	  = skb_in;
	icmp_param->offset = skb_network_offset(skb_in);
	inet_sk(sk)->tos = tos;
	sk->sk_mark = mark;
	ipc.addr = iph->saddr;
	ipc.opt = &icmp_param->replyopts.opt;
	ipc.tx_flags = 0;
	ipc.ttl = 0;
	ipc.tos = -1;

	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
			       type, code, icmp_param);
	if (IS_ERR(rt))
		goto out_unlock;
Loading