Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 01f2f3f6 authored by Hagen Paul Pfeifer's avatar Hagen Paul Pfeifer Committed by David S. Miller
Browse files

net: optimize Berkeley Packet Filter (BPF) processing



Gcc is currenlty not in the ability to optimize the switch statement in
sk_run_filter() because of dense case labels. This patch replace the
OR'd labels with ordered sequenced case labels. The sk_chk_filter()
function is modified to patch/replace the original OPCODES in a
ordered but equivalent form. gcc is now in the ability to transform the
switch statement in sk_run_filter into a jump table of complexity O(1).

Until this patch gcc generates a sequence of conditional branches (O(n) of 567
byte .text segment size (arch x86_64):

7ff: 8b 06                 mov    (%rsi),%eax
801: 66 83 f8 35           cmp    $0x35,%ax
805: 0f 84 d0 02 00 00     je     adb <sk_run_filter+0x31d>
80b: 0f 87 07 01 00 00     ja     918 <sk_run_filter+0x15a>
811: 66 83 f8 15           cmp    $0x15,%ax
815: 0f 84 c5 02 00 00     je     ae0 <sk_run_filter+0x322>
81b: 77 73                 ja     890 <sk_run_filter+0xd2>
81d: 66 83 f8 04           cmp    $0x4,%ax
821: 0f 84 17 02 00 00     je     a3e <sk_run_filter+0x280>
827: 77 29                 ja     852 <sk_run_filter+0x94>
829: 66 83 f8 01           cmp    $0x1,%ax
[...]

With the modification the compiler translate the switch statement into
the following jump table fragment:

7ff: 66 83 3e 2c           cmpw   $0x2c,(%rsi)
803: 0f 87 1f 02 00 00     ja     a28 <sk_run_filter+0x26a>
809: 0f b7 06              movzwl (%rsi),%eax
80c: ff 24 c5 00 00 00 00  jmpq   *0x0(,%rax,8)
813: 44 89 e3              mov    %r12d,%ebx
816: e9 43 03 00 00        jmpq   b5e <sk_run_filter+0x3a0>
81b: 41 89 dc              mov    %ebx,%r12d
81e: e9 3b 03 00 00        jmpq   b5e <sk_run_filter+0x3a0>

Furthermore, I reordered the instructions to reduce cache line misses by
order the most common instruction to the start.

Signed-off-by: default avatarHagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent bd97a63f
Loading
Loading
Loading
Loading
+48 −0
Original line number Diff line number Diff line
@@ -91,6 +91,54 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */
#define         BPF_TAX         0x00
#define         BPF_TXA         0x80

enum {
	BPF_S_RET_K = 0,
	BPF_S_RET_A,
	BPF_S_ALU_ADD_K,
	BPF_S_ALU_ADD_X,
	BPF_S_ALU_SUB_K,
	BPF_S_ALU_SUB_X,
	BPF_S_ALU_MUL_K,
	BPF_S_ALU_MUL_X,
	BPF_S_ALU_DIV_X,
	BPF_S_ALU_AND_K,
	BPF_S_ALU_AND_X,
	BPF_S_ALU_OR_K,
	BPF_S_ALU_OR_X,
	BPF_S_ALU_LSH_K,
	BPF_S_ALU_LSH_X,
	BPF_S_ALU_RSH_K,
	BPF_S_ALU_RSH_X,
	BPF_S_ALU_NEG,
	BPF_S_LD_W_ABS,
	BPF_S_LD_H_ABS,
	BPF_S_LD_B_ABS,
	BPF_S_LD_W_LEN,
	BPF_S_LD_W_IND,
	BPF_S_LD_H_IND,
	BPF_S_LD_B_IND,
	BPF_S_LD_IMM,
	BPF_S_LDX_W_LEN,
	BPF_S_LDX_B_MSH,
	BPF_S_LDX_IMM,
	BPF_S_MISC_TAX,
	BPF_S_MISC_TXA,
	BPF_S_ALU_DIV_K,
	BPF_S_LD_MEM,
	BPF_S_LDX_MEM,
	BPF_S_ST,
	BPF_S_STX,
	BPF_S_JMP_JA,
	BPF_S_JMP_JEQ_K,
	BPF_S_JMP_JEQ_X,
	BPF_S_JMP_JGE_K,
	BPF_S_JMP_JGE_X,
	BPF_S_JMP_JGT_K,
	BPF_S_JMP_JGT_X,
	BPF_S_JMP_JSET_K,
	BPF_S_JMP_JSET_X,
};

#ifndef BPF_MAXINSNS
#define BPF_MAXINSNS 4096
#endif
+161 −51
Original line number Diff line number Diff line
@@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
		fentry = &filter[pc];

		switch (fentry->code) {
		case BPF_ALU|BPF_ADD|BPF_X:
		case BPF_S_ALU_ADD_X:
			A += X;
			continue;
		case BPF_ALU|BPF_ADD|BPF_K:
		case BPF_S_ALU_ADD_K:
			A += fentry->k;
			continue;
		case BPF_ALU|BPF_SUB|BPF_X:
		case BPF_S_ALU_SUB_X:
			A -= X;
			continue;
		case BPF_ALU|BPF_SUB|BPF_K:
		case BPF_S_ALU_SUB_K:
			A -= fentry->k;
			continue;
		case BPF_ALU|BPF_MUL|BPF_X:
		case BPF_S_ALU_MUL_X:
			A *= X;
			continue;
		case BPF_ALU|BPF_MUL|BPF_K:
		case BPF_S_ALU_MUL_K:
			A *= fentry->k;
			continue;
		case BPF_ALU|BPF_DIV|BPF_X:
		case BPF_S_ALU_DIV_X:
			if (X == 0)
				return 0;
			A /= X;
			continue;
		case BPF_ALU|BPF_DIV|BPF_K:
		case BPF_S_ALU_DIV_K:
			A /= fentry->k;
			continue;
		case BPF_ALU|BPF_AND|BPF_X:
		case BPF_S_ALU_AND_X:
			A &= X;
			continue;
		case BPF_ALU|BPF_AND|BPF_K:
		case BPF_S_ALU_AND_K:
			A &= fentry->k;
			continue;
		case BPF_ALU|BPF_OR|BPF_X:
		case BPF_S_ALU_OR_X:
			A |= X;
			continue;
		case BPF_ALU|BPF_OR|BPF_K:
		case BPF_S_ALU_OR_K:
			A |= fentry->k;
			continue;
		case BPF_ALU|BPF_LSH|BPF_X:
		case BPF_S_ALU_LSH_X:
			A <<= X;
			continue;
		case BPF_ALU|BPF_LSH|BPF_K:
		case BPF_S_ALU_LSH_K:
			A <<= fentry->k;
			continue;
		case BPF_ALU|BPF_RSH|BPF_X:
		case BPF_S_ALU_RSH_X:
			A >>= X;
			continue;
		case BPF_ALU|BPF_RSH|BPF_K:
		case BPF_S_ALU_RSH_K:
			A >>= fentry->k;
			continue;
		case BPF_ALU|BPF_NEG:
		case BPF_S_ALU_NEG:
			A = -A;
			continue;
		case BPF_JMP|BPF_JA:
		case BPF_S_JMP_JA:
			pc += fentry->k;
			continue;
		case BPF_JMP|BPF_JGT|BPF_K:
		case BPF_S_JMP_JGT_K:
			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
			continue;
		case BPF_JMP|BPF_JGE|BPF_K:
		case BPF_S_JMP_JGE_K:
			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
			continue;
		case BPF_JMP|BPF_JEQ|BPF_K:
		case BPF_S_JMP_JEQ_K:
			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
			continue;
		case BPF_JMP|BPF_JSET|BPF_K:
		case BPF_S_JMP_JSET_K:
			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
			continue;
		case BPF_JMP|BPF_JGT|BPF_X:
		case BPF_S_JMP_JGT_X:
			pc += (A > X) ? fentry->jt : fentry->jf;
			continue;
		case BPF_JMP|BPF_JGE|BPF_X:
		case BPF_S_JMP_JGE_X:
			pc += (A >= X) ? fentry->jt : fentry->jf;
			continue;
		case BPF_JMP|BPF_JEQ|BPF_X:
		case BPF_S_JMP_JEQ_X:
			pc += (A == X) ? fentry->jt : fentry->jf;
			continue;
		case BPF_JMP|BPF_JSET|BPF_X:
		case BPF_S_JMP_JSET_X:
			pc += (A & X) ? fentry->jt : fentry->jf;
			continue;
		case BPF_LD|BPF_W|BPF_ABS:
		case BPF_S_LD_W_ABS:
			k = fentry->k;
load_w:
			ptr = load_pointer(skb, k, 4, &tmp);
@@ -217,7 +217,7 @@ load_w:
				continue;
			}
			break;
		case BPF_LD|BPF_H|BPF_ABS:
		case BPF_S_LD_H_ABS:
			k = fentry->k;
load_h:
			ptr = load_pointer(skb, k, 2, &tmp);
@@ -226,7 +226,7 @@ load_h:
				continue;
			}
			break;
		case BPF_LD|BPF_B|BPF_ABS:
		case BPF_S_LD_B_ABS:
			k = fentry->k;
load_b:
			ptr = load_pointer(skb, k, 1, &tmp);
@@ -235,54 +235,54 @@ load_b:
				continue;
			}
			break;
		case BPF_LD|BPF_W|BPF_LEN:
		case BPF_S_LD_W_LEN:
			A = skb->len;
			continue;
		case BPF_LDX|BPF_W|BPF_LEN:
		case BPF_S_LDX_W_LEN:
			X = skb->len;
			continue;
		case BPF_LD|BPF_W|BPF_IND:
		case BPF_S_LD_W_IND:
			k = X + fentry->k;
			goto load_w;
		case BPF_LD|BPF_H|BPF_IND:
		case BPF_S_LD_H_IND:
			k = X + fentry->k;
			goto load_h;
		case BPF_LD|BPF_B|BPF_IND:
		case BPF_S_LD_B_IND:
			k = X + fentry->k;
			goto load_b;
		case BPF_LDX|BPF_B|BPF_MSH:
		case BPF_S_LDX_B_MSH:
			ptr = load_pointer(skb, fentry->k, 1, &tmp);
			if (ptr != NULL) {
				X = (*(u8 *)ptr & 0xf) << 2;
				continue;
			}
			return 0;
		case BPF_LD|BPF_IMM:
		case BPF_S_LD_IMM:
			A = fentry->k;
			continue;
		case BPF_LDX|BPF_IMM:
		case BPF_S_LDX_IMM:
			X = fentry->k;
			continue;
		case BPF_LD|BPF_MEM:
		case BPF_S_LD_MEM:
			A = mem[fentry->k];
			continue;
		case BPF_LDX|BPF_MEM:
		case BPF_S_LDX_MEM:
			X = mem[fentry->k];
			continue;
		case BPF_MISC|BPF_TAX:
		case BPF_S_MISC_TAX:
			X = A;
			continue;
		case BPF_MISC|BPF_TXA:
		case BPF_S_MISC_TXA:
			A = X;
			continue;
		case BPF_RET|BPF_K:
		case BPF_S_RET_K:
			return fentry->k;
		case BPF_RET|BPF_A:
		case BPF_S_RET_A:
			return A;
		case BPF_ST:
		case BPF_S_ST:
			mem[fentry->k] = A;
			continue;
		case BPF_STX:
		case BPF_S_STX:
			mem[fentry->k] = X;
			continue;
		default:
@@ -390,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
		/* Only allow valid instructions */
		switch (ftest->code) {
		case BPF_ALU|BPF_ADD|BPF_K:
			ftest->code = BPF_S_ALU_ADD_K;
			break;
		case BPF_ALU|BPF_ADD|BPF_X:
			ftest->code = BPF_S_ALU_ADD_X;
			break;
		case BPF_ALU|BPF_SUB|BPF_K:
			ftest->code = BPF_S_ALU_SUB_K;
			break;
		case BPF_ALU|BPF_SUB|BPF_X:
			ftest->code = BPF_S_ALU_SUB_X;
			break;
		case BPF_ALU|BPF_MUL|BPF_K:
			ftest->code = BPF_S_ALU_MUL_K;
			break;
		case BPF_ALU|BPF_MUL|BPF_X:
			ftest->code = BPF_S_ALU_MUL_X;
			break;
		case BPF_ALU|BPF_DIV|BPF_X:
			ftest->code = BPF_S_ALU_DIV_X;
			break;
		case BPF_ALU|BPF_AND|BPF_K:
			ftest->code = BPF_S_ALU_AND_K;
			break;
		case BPF_ALU|BPF_AND|BPF_X:
			ftest->code = BPF_S_ALU_AND_X;
			break;
		case BPF_ALU|BPF_OR|BPF_K:
			ftest->code = BPF_S_ALU_OR_K;
			break;
		case BPF_ALU|BPF_OR|BPF_X:
			ftest->code = BPF_S_ALU_OR_X;
			break;
		case BPF_ALU|BPF_LSH|BPF_K:
			ftest->code = BPF_S_ALU_LSH_K;
			break;
		case BPF_ALU|BPF_LSH|BPF_X:
			ftest->code = BPF_S_ALU_LSH_X;
			break;
		case BPF_ALU|BPF_RSH|BPF_K:
			ftest->code = BPF_S_ALU_RSH_K;
			break;
		case BPF_ALU|BPF_RSH|BPF_X:
			ftest->code = BPF_S_ALU_RSH_X;
			break;
		case BPF_ALU|BPF_NEG:
			ftest->code = BPF_S_ALU_NEG;
			break;
		case BPF_LD|BPF_W|BPF_ABS:
			ftest->code = BPF_S_LD_W_ABS;
			break;
		case BPF_LD|BPF_H|BPF_ABS:
			ftest->code = BPF_S_LD_H_ABS;
			break;
		case BPF_LD|BPF_B|BPF_ABS:
			ftest->code = BPF_S_LD_B_ABS;
			break;
		case BPF_LD|BPF_W|BPF_LEN:
			ftest->code = BPF_S_LD_W_LEN;
			break;
		case BPF_LD|BPF_W|BPF_IND:
			ftest->code = BPF_S_LD_W_IND;
			break;
		case BPF_LD|BPF_H|BPF_IND:
			ftest->code = BPF_S_LD_H_IND;
			break;
		case BPF_LD|BPF_B|BPF_IND:
			ftest->code = BPF_S_LD_B_IND;
			break;
		case BPF_LD|BPF_IMM:
			ftest->code = BPF_S_LD_IMM;
			break;
		case BPF_LDX|BPF_W|BPF_LEN:
			ftest->code = BPF_S_LDX_W_LEN;
			break;
		case BPF_LDX|BPF_B|BPF_MSH:
			ftest->code = BPF_S_LDX_B_MSH;
			break;
		case BPF_LDX|BPF_IMM:
			ftest->code = BPF_S_LDX_IMM;
			break;
		case BPF_MISC|BPF_TAX:
			ftest->code = BPF_S_MISC_TAX;
			break;
		case BPF_MISC|BPF_TXA:
			ftest->code = BPF_S_MISC_TXA;
			break;
		case BPF_RET|BPF_K:
			ftest->code = BPF_S_RET_K;
			break;
		case BPF_RET|BPF_A:
			ftest->code = BPF_S_RET_A;
			break;

		/* Some instructions need special checks */

		case BPF_ALU|BPF_DIV|BPF_K:
			/* check for division by zero */
		case BPF_ALU|BPF_DIV|BPF_K:
			if (ftest->k == 0)
				return -EINVAL;
			ftest->code = BPF_S_ALU_DIV_K;
			break;

		/* check for invalid memory addresses */
		case BPF_LD|BPF_MEM:
			if (ftest->k >= BPF_MEMWORDS)
				return -EINVAL;
			ftest->code = BPF_S_LD_MEM;
			break;
		case BPF_LDX|BPF_MEM:
			if (ftest->k >= BPF_MEMWORDS)
				return -EINVAL;
			ftest->code = BPF_S_LDX_MEM;
			break;
		case BPF_ST:
			if (ftest->k >= BPF_MEMWORDS)
				return -EINVAL;
			ftest->code = BPF_S_ST;
			break;
		case BPF_STX:
			/* check for invalid memory addresses */
			if (ftest->k >= BPF_MEMWORDS)
				return -EINVAL;
			ftest->code = BPF_S_STX;
			break;

		case BPF_JMP|BPF_JA:
@@ -447,29 +522,64 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
			 */
			if (ftest->k >= (unsigned)(flen-pc-1))
				return -EINVAL;
			ftest->code = BPF_S_JMP_JA;
			break;

		case BPF_JMP|BPF_JEQ|BPF_K:
			ftest->code = BPF_S_JMP_JEQ_K;
			break;
		case BPF_JMP|BPF_JEQ|BPF_X:
			ftest->code = BPF_S_JMP_JEQ_X;
			break;
		case BPF_JMP|BPF_JGE|BPF_K:
			ftest->code = BPF_S_JMP_JGE_K;
			break;
		case BPF_JMP|BPF_JGE|BPF_X:
			ftest->code = BPF_S_JMP_JGE_X;
			break;
		case BPF_JMP|BPF_JGT|BPF_K:
			ftest->code = BPF_S_JMP_JGT_K;
			break;
		case BPF_JMP|BPF_JGT|BPF_X:
			ftest->code = BPF_S_JMP_JGT_X;
			break;
		case BPF_JMP|BPF_JSET|BPF_K:
			ftest->code = BPF_S_JMP_JSET_K;
			break;
		case BPF_JMP|BPF_JSET|BPF_X:
			ftest->code = BPF_S_JMP_JSET_X;
			break;

		default:
			return -EINVAL;
		}

			/* for conditionals both must be safe */
		switch (ftest->code) {
		case BPF_S_JMP_JEQ_K:
		case BPF_S_JMP_JEQ_X:
		case BPF_S_JMP_JGE_K:
		case BPF_S_JMP_JGE_X:
		case BPF_S_JMP_JGT_K:
		case BPF_S_JMP_JGT_X:
		case BPF_S_JMP_JSET_X:
		case BPF_S_JMP_JSET_K:
			if (pc + ftest->jt + 1 >= flen ||
			    pc + ftest->jf + 1 >= flen)
				return -EINVAL;
			break;
		}
	}

	/* last instruction must be a RET code */
	switch (filter[flen - 1].code) {
	case BPF_S_RET_K:
	case BPF_S_RET_A:
		return 0;
		break;
		default:
			return -EINVAL;
		}
}

	return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
}
EXPORT_SYMBOL(sk_chk_filter);

/**