Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c7ad724b authored by JP Abgrall's avatar JP Abgrall Committed by Amit Pundir
Browse files

netfilter: xt_qtaguid: start tracking iface rx/tx at low level



qtaguid tracks the device stats by monitoring when it goes up and down,
then it gets the dev_stats().
But devs don't correctly report stats (either they don't count headers
symmetrically between rx/tx, or they count internal control messages).

Now qtaguid counts the rx/tx bytes/packets during raw:prerouting and
mangle:postrouting (nat is not available in ipv6).

The results are in
  /proc/net/xt_qtaguid/iface_stat_fmt
which outputs a format line (bash expansion):
  ifname  total_skb_{rx,tx}_{bytes,packets}

Added event counters for pre/post handling.
Added extra ctrl_*() pid/uid debugging.

Change-Id: Id84345d544ad1dd5f63e3842cab229e71d339297
Signed-off-by: default avatarJP Abgrall <jpa@google.com>
parent 77fe91c8
Loading
Loading
Loading
Loading
+216 −61
Original line number Diff line number Diff line
@@ -114,8 +114,15 @@ module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
/*---------------------------------------------------------------------------*/
static const char *iface_stat_procdirname = "iface_stat";
static struct proc_dir_entry *iface_stat_procdir;
/*
 * The iface_stat_all* will go away once userspace gets use to the new fields
 * that have a format line.
 */
static const char *iface_stat_all_procfilename = "iface_stat_all";
static struct proc_dir_entry *iface_stat_all_procfile;
static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
static struct proc_dir_entry *iface_stat_fmt_procfile;


/*
 * Ordering of locks:
@@ -128,9 +135,9 @@ static struct proc_dir_entry *iface_stat_all_procfile;
 * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
 * is acquired.
 *
 * Call tree with all lock holders as of 2011-09-25:
 * Call tree with all lock holders as of 2012-04-27:
 *
 * iface_stat_all_proc_read()
 * iface_stat_fmt_proc_read()
 *   iface_stat_list_lock
 *     (struct iface_stat)
 *
@@ -781,13 +788,14 @@ static struct iface_stat *get_iface_entry(const char *ifname)
	return iface_entry;
}

static int iface_stat_all_proc_read(char *page, char **num_items_returned,
static int iface_stat_fmt_proc_read(char *page, char **num_items_returned,
				    off_t items_to_skip, int char_count,
				    int *eof, void *data)
{
	char *outp = page;
	int item_index = 0;
	int len;
	int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */
	struct iface_stat *iface_entry;
	struct rtnl_link_stats64 dev_stats, *stats;
	struct rtnl_link_stats64 no_dev_stats = {0};
@@ -797,14 +805,32 @@ static int iface_stat_all_proc_read(char *page, char **num_items_returned,
		return 0;
	}

	CT_DEBUG("qtaguid:proc iface_stat_all "
	CT_DEBUG("qtaguid:proc iface_stat_fmt "
		 "pid=%u tgid=%u uid=%u "
		 "page=%p *num_items_returned=%p off=%ld "
		 "char_count=%d *eof=%d\n", page, *num_items_returned,
		 "char_count=%d *eof=%d\n",
		 current->pid, current->tgid, current_fsuid(),
		 page, *num_items_returned,
		 items_to_skip, char_count, *eof);

	if (*eof)
		return 0;

	if (fmt == 2 && item_index++ >= items_to_skip) {
		len = snprintf(outp, char_count,
			       "ifname "
			       "total_skb_rx_bytes total_skb_rx_packets "
			       "total_skb_tx_bytes total_skb_tx_packets\n"
			);
		if (len >= char_count) {
			*outp = '\0';
			return outp - page;
		}
		outp += len;
		char_count -= len;
		(*num_items_returned)++;
	}

	/*
	 * This lock will prevent iface_stat_update() from changing active,
	 * and in turn prevent an interface from unregistering itself.
@@ -820,18 +846,37 @@ static int iface_stat_all_proc_read(char *page, char **num_items_returned,
		} else {
			stats = &no_dev_stats;
		}
		len = snprintf(outp, char_count,
		/*
		 * If the meaning of the data changes, then update the fmtX
		 * string.
		 */
		if (fmt == 1) {
			len = snprintf(
				outp, char_count,
				"%s %d "
				"%llu %llu %llu %llu "
				"%llu %llu %llu %llu\n",
				iface_entry->ifname,
				iface_entry->active,
			       iface_entry->totals[IFS_RX].bytes,
			       iface_entry->totals[IFS_RX].packets,
			       iface_entry->totals[IFS_TX].bytes,
			       iface_entry->totals[IFS_TX].packets,
				iface_entry->totals_via_dev[IFS_RX].bytes,
				iface_entry->totals_via_dev[IFS_RX].packets,
				iface_entry->totals_via_dev[IFS_TX].bytes,
				iface_entry->totals_via_dev[IFS_TX].packets,
				stats->rx_bytes, stats->rx_packets,
			       stats->tx_bytes, stats->tx_packets);
				stats->tx_bytes, stats->tx_packets
				);
		} else {
			len = snprintf(
				outp, char_count,
				"%s "
				"%llu %llu %llu %llu\n",
				iface_entry->ifname,
				iface_entry->totals_via_skb[IFS_RX].bytes,
				iface_entry->totals_via_skb[IFS_RX].packets,
				iface_entry->totals_via_skb[IFS_TX].bytes,
				iface_entry->totals_via_skb[IFS_TX].packets
				);
		}
		if (len >= char_count) {
			spin_unlock_bh(&iface_stat_list_lock);
			*outp = '\0';
@@ -865,13 +910,17 @@ static void iface_create_proc_worker(struct work_struct *work)
	new_iface->proc_ptr = proc_entry;

	create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
			read_proc_u64, &new_iface->totals[IFS_TX].bytes);
			       read_proc_u64,
			       &new_iface->totals_via_dev[IFS_TX].bytes);
	create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
			read_proc_u64, &new_iface->totals[IFS_RX].bytes);
			       read_proc_u64,
			       &new_iface->totals_via_dev[IFS_RX].bytes);
	create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
			read_proc_u64, &new_iface->totals[IFS_TX].packets);
			       read_proc_u64,
			       &new_iface->totals_via_dev[IFS_TX].packets);
	create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
			read_proc_u64, &new_iface->totals[IFS_RX].packets);
			       read_proc_u64,
			       &new_iface->totals_via_dev[IFS_RX].packets);
	create_proc_read_entry("active", proc_iface_perms, proc_entry,
			read_proc_bool, &new_iface->active);

@@ -975,11 +1024,13 @@ static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
			     "iface reset its stats unexpectedly\n", __func__,
			     net_dev->name);

		iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes;
		iface->totals[IFS_TX].packets +=
		iface->totals_via_dev[IFS_TX].bytes +=
			iface->last_known[IFS_TX].bytes;
		iface->totals_via_dev[IFS_TX].packets +=
			iface->last_known[IFS_TX].packets;
		iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes;
		iface->totals[IFS_RX].packets +=
		iface->totals_via_dev[IFS_RX].bytes +=
			iface->last_known[IFS_RX].bytes;
		iface->totals_via_dev[IFS_RX].packets +=
			iface->last_known[IFS_RX].packets;
		iface->last_known_valid = false;
		IF_DEBUG("qtaguid: %s(%s): iface=%p "
@@ -1147,6 +1198,27 @@ static struct sock_tag *get_sock_stat(const struct sock *sk)
	return sock_tag_entry;
}

static int ipx_proto(const struct sk_buff *skb,
		     struct xt_action_param *par)
{
	int thoff = 0, tproto;

	switch (par->family) {
	case NFPROTO_IPV6:
		tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
		if (tproto < 0)
			MT_DEBUG("%s(): transport header not found in ipv6"
				 " skb=%p\n", __func__, skb);
		break;
	case NFPROTO_IPV4:
		tproto = ip_hdr(skb)->protocol;
		break;
	default:
		tproto = IPPROTO_RAW;
	}
	return tproto;
}

static void
data_counters_update(struct data_counters *dc, int set,
		     enum ifs_tx_rx direction, int proto, int bytes)
@@ -1207,10 +1279,10 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only)
		spin_unlock_bh(&iface_stat_list_lock);
		return;
	}
	entry->totals[IFS_TX].bytes += stats->tx_bytes;
	entry->totals[IFS_TX].packets += stats->tx_packets;
	entry->totals[IFS_RX].bytes += stats->rx_bytes;
	entry->totals[IFS_RX].packets += stats->rx_packets;
	entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
	entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
	entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
	entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
	/* We don't need the last_known[] anymore */
	entry->last_known_valid = false;
	_iface_stat_set_active(entry, net_dev, false);
@@ -1220,6 +1292,67 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only)
	spin_unlock_bh(&iface_stat_list_lock);
}

/*
 * Update stats for the specified interface from the skb.
 * Do nothing if the entry
 * does not exist (when a device was never configured with an IP address).
 * Called on each sk.
 */
static void iface_stat_update_from_skb(const struct sk_buff *skb,
				       struct xt_action_param *par)
{
	struct iface_stat *entry;
	const struct net_device *el_dev;
	enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
	int bytes = skb->len;

	if (!skb->dev) {
		MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
		el_dev = par->in ? : par->out;
	} else {
		const struct net_device *other_dev;
		el_dev = skb->dev;
		other_dev = par->in ? : par->out;
		if (el_dev != other_dev) {
			MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
				 "par->(in/out)=%p %s\n",
				 par->hooknum, el_dev, el_dev->name, other_dev,
				 other_dev->name);
		}
	}

	if (unlikely(!el_dev)) {
		pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n",
		       par->hooknum, __func__);
		BUG();
	} else if (unlikely(!el_dev->name)) {
		pr_err("qtaguid[%d]: %s(): no dev->name?!!\n",
		       par->hooknum, __func__);
		BUG();
	} else {
		int proto = ipx_proto(skb, par);
		MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
			 par->hooknum, el_dev->name, el_dev->type,
			 par->family, proto);
	}

	spin_lock_bh(&iface_stat_list_lock);
	entry = get_iface_entry(el_dev->name);
	if (entry == NULL) {
		IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
			 __func__, el_dev->name);
		spin_unlock_bh(&iface_stat_list_lock);
		return;
	}

	IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
		 el_dev->name, entry);

	entry->totals_via_skb[direction].bytes += bytes;
	entry->totals_via_skb[direction].packets++;
	spin_unlock_bh(&iface_stat_list_lock);
}

static void tag_stat_update(struct tag_stat *tag_entry,
			enum ifs_tx_rx direction, int proto, int bytes)
{
@@ -1467,18 +1600,31 @@ static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
						    parent_procdir);
	if (!iface_stat_all_procfile) {
		pr_err("qtaguid: iface_stat: init "
		       " failed to create stat_all proc entry\n");
		       " failed to create stat_old proc entry\n");
		err = -1;
		goto err_zap_entry;
	}
	iface_stat_all_procfile->read_proc = iface_stat_all_proc_read;
	iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read;
	iface_stat_all_procfile->data = (void *)1; /* fmt1 */

	iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename,
						    proc_iface_perms,
						    parent_procdir);
	if (!iface_stat_fmt_procfile) {
		pr_err("qtaguid: iface_stat: init "
		       " failed to create stat_all proc entry\n");
		err = -1;
		goto err_zap_all_stats_entry;
	}
	iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read;
	iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */


	err = register_netdevice_notifier(&iface_netdev_notifier_blk);
	if (err) {
		pr_err("qtaguid: iface_stat: init "
		       "failed to register dev event handler\n");
		goto err_zap_all_stats_entry;
		goto err_zap_all_stats_entries;
	}
	err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
	if (err) {
@@ -1499,6 +1645,8 @@ static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
	unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
err_unreg_nd:
	unregister_netdevice_notifier(&iface_netdev_notifier_blk);
err_zap_all_stats_entries:
	remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
err_zap_all_stats_entry:
	remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
err_zap_entry:
@@ -1550,27 +1698,6 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
	return sk;
}

static int ipx_proto(const struct sk_buff *skb,
		     struct xt_action_param *par)
{
	int thoff = 0, tproto;

	switch (par->family) {
	case NFPROTO_IPV6:
		tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
		if (tproto < 0)
			MT_DEBUG("%s(): transport header not found in ipv6"
				 " skb=%p\n", __func__, skb);
		break;
	case NFPROTO_IPV4:
		tproto = ip_hdr(skb)->protocol;
		break;
	default:
		tproto = IPPROTO_RAW;
	}
	return tproto;
}

static void account_for_uid(const struct sk_buff *skb,
			    const struct sock *alternate_sk, uid_t uid,
			    struct xt_action_param *par)
@@ -1630,8 +1757,22 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
		goto ret_res;
	}

	sk = skb->sk;
	switch (par->hooknum) {
	case NF_INET_PRE_ROUTING:
	case NF_INET_POST_ROUTING:
		atomic64_inc(&qtu_events.match_calls_prepost);
		iface_stat_update_from_skb(skb, par);
		/*
		 * We are done in pre/post. The skb will get processed
		 * further alter.
		 */
		res = (info->match ^ info->invert);
		goto ret_res;
		break;
	/* default: Fall through and do UID releated work */
	}

	sk = skb->sk;
	if (sk == NULL) {
		/*
		 * A missing sk->sk_socket happens when packets are in-flight
@@ -1806,7 +1947,9 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
	if (*eof)
		return 0;

	CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
	CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u "
		 "page=%p off=%ld char_count=%d *eof=%d\n",
		 current->pid, current->tgid, current_fsuid(),
		 page, items_to_skip, char_count, *eof);

	spin_lock_bh(&sock_tag_list_lock);
@@ -1851,6 +1994,7 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
			       "delete_cmds=%llu "
			       "iface_events=%llu "
			       "match_calls=%llu "
			       "match_calls_prepost=%llu "
			       "match_found_sk=%llu "
			       "match_found_sk_in_ct=%llu "
			       "match_found_no_sk_in_ct=%llu "
@@ -1862,6 +2006,7 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
			       atomic64_read(&qtu_events.delete_cmds),
			       atomic64_read(&qtu_events.iface_events),
			       atomic64_read(&qtu_events.match_calls),
			       atomic64_read(&qtu_events.match_calls_prepost),
			       atomic64_read(&qtu_events.match_found_sk),
			       atomic64_read(&qtu_events.match_found_sk_in_ct),
			       atomic64_read(
@@ -2135,7 +2280,9 @@ static int ctrl_cmd_tag(const char *input)
	el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
	if (!el_socket) {
		pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
			" sock_fd=%d err=%d\n", input, sock_fd, res);
			" sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
			input, sock_fd, res, current->pid, current->tgid,
			current_fsuid());
		goto err;
	}
	CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
@@ -2280,7 +2427,9 @@ static int ctrl_cmd_untag(const char *input)
	el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
	if (!el_socket) {
		pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
			" sock_fd=%d err=%d\n", input, sock_fd, res);
			" sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
			input, sock_fd, res, current->pid, current->tgid,
			current_fsuid());
		goto err;
	}
	CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
@@ -2356,6 +2505,9 @@ static int qtaguid_ctrl_parse(const char *input, int count)
	char cmd;
	int res;

	CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
		 input, current->pid, current->tgid, current_fsuid());

	cmd = input[0];
	/* Collect params for commands */
	switch (cmd) {
@@ -2532,8 +2684,11 @@ static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
		return len;
	}

	CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
		"char_count=%d *eof=%d\n", page, *num_items_returned,
	CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u "
		 "page=%p *num_items_returned=%p off=%ld "
		 "char_count=%d *eof=%d\n",
		 current->pid, current->tgid, current_fsuid(),
		 page, *num_items_returned,
		 items_to_skip, char_count, *eof);

	if (*eof)
+4 −1
Original line number Diff line number Diff line
@@ -202,7 +202,8 @@ struct iface_stat {
	/* net_dev is only valid for active iface_stat */
	struct net_device *net_dev;

	struct byte_packet_counters totals[IFS_MAX_DIRECTIONS];
	struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
	struct byte_packet_counters totals_via_skb[IFS_MAX_DIRECTIONS];
	/*
	 * We keep the last_known, because some devices reset their counters
	 * just before NETDEV_UP, while some will reset just before
@@ -254,6 +255,8 @@ struct qtaguid_event_counts {
	atomic64_t iface_events;  /* Number of NETDEV_* events handled */

	atomic64_t match_calls;   /* Number of times iptables called mt */
	/* Number of times iptables called mt from pre or post routing hooks */
	atomic64_t match_calls_prepost;
	/*
	 * match_found_sk_*: numbers related to the netfilter matching
	 * function finding a sock for the sk_buff.
+13 −5
Original line number Diff line number Diff line
@@ -183,7 +183,11 @@ char *pp_iface_stat(struct iface_stat *is)
		res = kasprintf(GFP_ATOMIC, "iface_stat@%p{"
				"list=list_head{...}, "
				"ifname=%s, "
				"total={rx={bytes=%llu, "
				"total_dev={rx={bytes=%llu, "
				"packets=%llu}, "
				"tx={bytes=%llu, "
				"packets=%llu}}, "
				"total_skb={rx={bytes=%llu, "
				"packets=%llu}, "
				"tx={bytes=%llu, "
				"packets=%llu}}, "
@@ -198,10 +202,14 @@ char *pp_iface_stat(struct iface_stat *is)
				"tag_stat_tree=rb_root{...}}",
				is,
				is->ifname,
				is->totals[IFS_RX].bytes,
				is->totals[IFS_RX].packets,
				is->totals[IFS_TX].bytes,
				is->totals[IFS_TX].packets,
				is->totals_via_dev[IFS_RX].bytes,
				is->totals_via_dev[IFS_RX].packets,
				is->totals_via_dev[IFS_TX].bytes,
				is->totals_via_dev[IFS_TX].packets,
				is->totals_via_skb[IFS_RX].bytes,
				is->totals_via_skb[IFS_RX].packets,
				is->totals_via_skb[IFS_TX].bytes,
				is->totals_via_skb[IFS_TX].packets,
				is->last_known_valid,
				is->last_known[IFS_RX].bytes,
				is->last_known[IFS_RX].packets,