Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0181ce31 authored by Don Hiatt's avatar Don Hiatt Committed by Doug Ledford
Browse files

IB/hfi1: Add receive fault injection feature



Add fault injection capability:
  - Drop packets unconditionally (fault_by_packet)
  - Drop packets based on opcode (fault_by_opcode)

This feature reacts to the global FAULT_INJECTION
config flag.

The faulting traces have been added:
  - misc/fault_opcode
  - misc/fault_packet

See 'Documentation/fault-injection/fault-injection.txt'
for details.

Examples:
  - Dropping packets by opcode:
    /sys/kernel/debug/hfi1/hfi1_X/fault_opcode
	# Enable fault
	echo Y > fault_by_opcode
	# Setprobability of dropping (0-100%)
	# echo 25 > probability
	# Set opcode
	echo 0x64 > opcode
	# Number of times to fault
	echo 3 > times
	# An optional mask allows you to fault
	# a range of opcodes
	echo 0xf0 > mask
    /sys/kernel/debug/hfi1/hfi1_X/fault_stats
    contains a value in parentheses to indicate
    number of each opcode dropped.

  - Dropping packets unconditionally
    /sys/kernel/debug/hfi1/hfi1_X/fault_packet
	# Enable fault
	echo Y > fault_by_packet
    /sys/kernel/debug/hfi1/hfi1_X/fault_packet/fault_stats
    contains the number of packets dropped.

Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarDon Hiatt <don.hiatt@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent f7b42633
Loading
Loading
Loading
Loading
+222 −0
Original line number Diff line number Diff line
@@ -51,8 +51,12 @@
#include <linux/export.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ratelimit.h>
#include <linux/fault-inject.h>

#include "hfi.h"
#include "trace.h"
#include "debugfs.h"
#include "device.h"
#include "qp.h"
@@ -1063,6 +1067,217 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
DEBUGFS_FILE_OPS(sdma_cpu_list);

#ifdef CONFIG_FAULT_INJECTION
static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
{
	struct hfi1_opcode_stats_perctx *opstats;

	if (*pos >= ARRAY_SIZE(opstats->stats))
		return NULL;
	return pos;
}

static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
	struct hfi1_opcode_stats_perctx *opstats;

	++*pos;
	if (*pos >= ARRAY_SIZE(opstats->stats))
		return NULL;
	return pos;
}

static void _fault_stats_seq_stop(struct seq_file *s, void *v)
{
}

static int _fault_stats_seq_show(struct seq_file *s, void *v)
{
	loff_t *spos = v;
	loff_t i = *spos, j;
	u64 n_packets = 0, n_bytes = 0;
	struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
	struct hfi1_devdata *dd = dd_from_dev(ibd);

	for (j = 0; j < dd->first_user_ctxt; j++) {
		if (!dd->rcd[j])
			continue;
		n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
		n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
	}
	if (!n_packets && !n_bytes)
		return SEQ_SKIP;
	if (!ibd->fault_opcode->n_rxfaults[i] &&
	    !ibd->fault_opcode->n_txfaults[i])
		return SEQ_SKIP;
	seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
		   (unsigned long long)n_packets,
		   (unsigned long long)n_bytes,
		   (unsigned long long)ibd->fault_opcode->n_rxfaults[i],
		   (unsigned long long)ibd->fault_opcode->n_txfaults[i]);
	return 0;
}

DEBUGFS_SEQ_FILE_OPS(fault_stats);
DEBUGFS_SEQ_FILE_OPEN(fault_stats);
DEBUGFS_FILE_OPS(fault_stats);

static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
{
	debugfs_remove_recursive(ibd->fault_opcode->dir);
	kfree(ibd->fault_opcode);
	ibd->fault_opcode = NULL;
}

static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
{
	struct dentry *parent = ibd->hfi1_ibdev_dbg;

	ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
	if (!ibd->fault_opcode)
		return -ENOMEM;

	ibd->fault_opcode->attr.interval = 1;
	ibd->fault_opcode->attr.require_end = ULONG_MAX;
	ibd->fault_opcode->attr.stacktrace_depth = 32;
	ibd->fault_opcode->attr.dname = NULL;
	ibd->fault_opcode->attr.verbose = 0;
	ibd->fault_opcode->fault_by_opcode = false;
	ibd->fault_opcode->opcode = 0;
	ibd->fault_opcode->mask = 0xff;

	ibd->fault_opcode->dir =
		fault_create_debugfs_attr("fault_opcode",
					  parent,
					  &ibd->fault_opcode->attr);
	if (IS_ERR(ibd->fault_opcode->dir)) {
		kfree(ibd->fault_opcode);
		return -ENOENT;
	}

	DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
	if (!debugfs_create_bool("fault_by_opcode", 0600,
				 ibd->fault_opcode->dir,
				 &ibd->fault_opcode->fault_by_opcode))
		goto fail;
	if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
			       &ibd->fault_opcode->opcode))
		goto fail;
	if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
			       &ibd->fault_opcode->mask))
		goto fail;

	return 0;
fail:
	fault_exit_opcode_debugfs(ibd);
	return -ENOMEM;
}

static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
{
	debugfs_remove_recursive(ibd->fault_packet->dir);
	kfree(ibd->fault_packet);
	ibd->fault_packet = NULL;
}

static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
{
	struct dentry *parent = ibd->hfi1_ibdev_dbg;

	ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
	if (!ibd->fault_packet)
		return -ENOMEM;

	ibd->fault_packet->attr.interval = 1;
	ibd->fault_packet->attr.require_end = ULONG_MAX;
	ibd->fault_packet->attr.stacktrace_depth = 32;
	ibd->fault_packet->attr.dname = NULL;
	ibd->fault_packet->attr.verbose = 0;
	ibd->fault_packet->fault_by_packet = false;

	ibd->fault_packet->dir =
		fault_create_debugfs_attr("fault_packet",
					  parent,
					  &ibd->fault_opcode->attr);
	if (IS_ERR(ibd->fault_packet->dir)) {
		kfree(ibd->fault_packet);
		return -ENOENT;
	}

	if (!debugfs_create_bool("fault_by_packet", 0600,
				 ibd->fault_packet->dir,
				 &ibd->fault_packet->fault_by_packet))
		goto fail;
	if (!debugfs_create_u64("fault_stats", 0400,
				ibd->fault_packet->dir,
				&ibd->fault_packet->n_faults))
		goto fail;

	return 0;
fail:
	fault_exit_packet_debugfs(ibd);
	return -ENOMEM;
}

static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
{
	fault_exit_opcode_debugfs(ibd);
	fault_exit_packet_debugfs(ibd);
}

static int fault_init_debugfs(struct hfi1_ibdev *ibd)
{
	int ret = 0;

	ret = fault_init_opcode_debugfs(ibd);
	if (ret)
		return ret;

	ret = fault_init_packet_debugfs(ibd);
	if (ret)
		fault_exit_opcode_debugfs(ibd);

	return ret;
}

bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
{
	bool ret = false;
	struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);

	if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
		return false;
	if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
		return false;
	ret = should_fail(&ibd->fault_opcode->attr, 1);
	if (ret) {
		trace_hfi1_fault_opcode(qp, opcode);
		if (rx)
			ibd->fault_opcode->n_rxfaults[opcode]++;
		else
			ibd->fault_opcode->n_txfaults[opcode]++;
	}
	return ret;
}

bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
	struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
	struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
	bool ret = false;

	if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
		return false;

	ret = should_fail(&ibd->fault_packet->attr, 1);
	if (ret) {
		++ibd->fault_packet->n_faults;
		trace_hfi1_fault_packet(packet);
	}
	return ret;
}
#endif

void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
	char name[sizeof("port0counters") + 1];
@@ -1112,12 +1327,19 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
					    !port_cntr_ops[i].ops.write ?
					    S_IRUGO : S_IRUGO | S_IWUSR);
		}

#ifdef CONFIG_FAULT_INJECTION
	fault_init_debugfs(ibd);
#endif
}

void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
	if (!hfi1_dbg_root)
		goto out;
#ifdef CONFIG_FAULT_INJECTION
	fault_exit_debugfs(ibd);
#endif
	debugfs_remove(ibd->hfi1_ibdev_link);
	debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
+48 −3
Original line number Diff line number Diff line
@@ -53,23 +53,68 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
void hfi1_dbg_init(void);
void hfi1_dbg_exit(void);

#ifdef CONFIG_FAULT_INJECTION
#include <linux/fault-inject.h>
struct fault_opcode {
	struct fault_attr attr;
	struct dentry *dir;
	bool fault_by_opcode;
	u64 n_rxfaults[256];
	u64 n_txfaults[256];
	u8 opcode;
	u8 mask;
};

struct fault_packet {
	struct fault_attr attr;
	struct dentry *dir;
	bool fault_by_packet;
	u64 n_faults;
};

bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
#else
static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
	return false;
}

static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
					 u32 opcode, bool rx)
{
	return false;
}
#endif

#else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
}

void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
}

static inline void hfi1_dbg_init(void)
{
}

void hfi1_dbg_init(void)
static inline void hfi1_dbg_exit(void)
{
}

void hfi1_dbg_exit(void)
static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
	return false;
}

static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
					 u32 opcode, bool rx)
{
	return false;
}
#endif

#endif                          /* _HFI1_DEBUGFS_H */
+8 −0
Original line number Diff line number Diff line
@@ -59,6 +59,7 @@
#include "trace.h"
#include "qp.h"
#include "sdma.h"
#include "debugfs.h"

#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -1354,6 +1355,9 @@ void handle_eflags(struct hfi1_packet *packet)
 */
int process_receive_ib(struct hfi1_packet *packet)
{
	if (unlikely(hfi1_dbg_fault_packet(packet)))
		return RHF_RCV_CONTINUE;

	trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
			  packet->rcd->ctxt,
			  rhf_err_flags(packet->rhf),
@@ -1409,6 +1413,8 @@ int process_receive_error(struct hfi1_packet *packet)

int kdeth_process_expected(struct hfi1_packet *packet)
{
	if (unlikely(hfi1_dbg_fault_packet(packet)))
		return RHF_RCV_CONTINUE;
	if (unlikely(rhf_err_flags(packet->rhf)))
		handle_eflags(packet);

@@ -1421,6 +1427,8 @@ int kdeth_process_eager(struct hfi1_packet *packet)
{
	if (unlikely(rhf_err_flags(packet->rhf)))
		handle_eflags(packet);
	if (unlikely(hfi1_dbg_fault_packet(packet)))
		return RHF_RCV_CONTINUE;

	dd_dev_err(packet->rcd->dd,
		   "Unhandled eager packet received. Dropping.\n");
+48 −0
Original line number Diff line number Diff line
@@ -72,6 +72,54 @@ TRACE_EVENT(hfi1_interrupt,
		      __entry->src)
);

#ifdef CONFIG_FAULT_INJECTION
TRACE_EVENT(hfi1_fault_opcode,
	    TP_PROTO(struct rvt_qp *qp, u8 opcode),
	    TP_ARGS(qp, opcode),
	    TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
			     __field(u32, qpn)
			     __field(u8, opcode)
			     ),
	    TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
			   __entry->qpn = qp->ibqp.qp_num;
			   __entry->opcode = opcode;
			   ),
	    TP_printk("[%s] qpn 0x%x opcode 0x%x",
		      __get_str(dev), __entry->qpn, __entry->opcode)
);

TRACE_EVENT(hfi1_fault_packet,
	    TP_PROTO(struct hfi1_packet *packet),
	    TP_ARGS(packet),
	    TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd)
			     __field(u64, eflags)
			     __field(u32, ctxt)
			     __field(u32, hlen)
			     __field(u32, tlen)
			     __field(u32, updegr)
			     __field(u32, etail)
			     ),
	     TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd);
			    __entry->eflags = rhf_err_flags(packet->rhf);
			    __entry->ctxt = packet->rcd->ctxt;
			    __entry->hlen = packet->hlen;
			    __entry->tlen = packet->tlen;
			    __entry->updegr = packet->updegr;
			    __entry->etail = rhf_egr_index(packet->rhf);
			    ),
	     TP_printk(
		"[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d",
		__get_str(dev),
		__entry->ctxt,
		__entry->eflags,
		__entry->hlen,
		__entry->tlen,
		__entry->updegr,
		__entry->etail
		)
);
#endif

#endif /* __HFI1_TRACE_MISC_H */

#undef TRACE_INCLUDE_PATH
+6 −0
Original line number Diff line number Diff line
@@ -60,6 +60,7 @@
#include "trace.h"
#include "qp.h"
#include "verbs_txreq.h"
#include "debugfs.h"

static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -599,6 +600,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
			rcu_read_unlock();
			goto drop;
		}
		if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
						   true))) {
			rcu_read_unlock();
			goto drop;
		}
		spin_lock_irqsave(&packet->qp->r_lock, flags);
		packet_handler = qp_ok(opcode, packet);
		if (likely(packet_handler))
Loading