Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7c3c867f authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'please-pull-aer-trace' of...

Merge tag 'please-pull-aer-trace' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

 into perf/core

Use perf/event tracing to report PCI Express advanced errors, by
Tony Luck.

Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 2a133759 2cced2d9
Loading
Loading
Loading
Loading
+16 −3
Original line number Original line Diff line number Diff line
@@ -29,6 +29,7 @@
#include <linux/time.h>
#include <linux/time.h>
#include <linux/cper.h>
#include <linux/cper.h>
#include <linux/acpi.h>
#include <linux/acpi.h>
#include <linux/pci.h>
#include <linux/aer.h>
#include <linux/aer.h>


/*
/*
@@ -249,6 +250,10 @@ static const char *cper_pcie_port_type_strs[] = {
static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
			    const struct acpi_hest_generic_data *gdata)
			    const struct acpi_hest_generic_data *gdata)
{
{
#ifdef CONFIG_ACPI_APEI_PCIEAER
	struct pci_dev *dev;
#endif

	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
@@ -281,10 +286,18 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
#ifdef CONFIG_ACPI_APEI_PCIEAER
#ifdef CONFIG_ACPI_APEI_PCIEAER
	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
	dev = pci_get_domain_bus_and_slot(pcie->device_id.segment,
		struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
			pcie->device_id.bus, pcie->device_id.function);
		cper_print_aer(pfx, gdata->error_severity, aer_regs);
	if (!dev) {
		pr_err("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n",
			pcie->device_id.segment, pcie->device_id.bus,
			pcie->device_id.slot, pcie->device_id.function);
		return;
	}
	}
	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO)
		cper_print_aer(pfx, dev, gdata->error_severity,
				(struct aer_capability_regs *) pcie->aer_info);
	pci_dev_put(dev);
#endif
#endif
}
}


+34 −29
Original line number Original line Diff line number Diff line
@@ -23,6 +23,9 @@


#include "aerdrv.h"
#include "aerdrv.h"


#define CREATE_TRACE_POINTS
#include <trace/events/ras.h>

#define AER_AGENT_RECEIVER		0
#define AER_AGENT_RECEIVER		0
#define AER_AGENT_REQUESTER		1
#define AER_AGENT_REQUESTER		1
#define AER_AGENT_COMPLETER		2
#define AER_AGENT_COMPLETER		2
@@ -121,12 +124,11 @@ static const char *aer_agent_string[] = {
	"Transmitter ID"
	"Transmitter ID"
};
};


static void __aer_print_error(const char *prefix,
static void __aer_print_error(struct pci_dev *dev,
			      struct aer_err_info *info)
			      struct aer_err_info *info)
{
{
	int i, status;
	int i, status;
	const char *errmsg = NULL;
	const char *errmsg = NULL;

	status = (info->status & ~info->mask);
	status = (info->status & ~info->mask);


	for (i = 0; i < 32; i++) {
	for (i = 0; i < 32; i++) {
@@ -141,26 +143,22 @@ static void __aer_print_error(const char *prefix,
				aer_uncorrectable_error_string[i] : NULL;
				aer_uncorrectable_error_string[i] : NULL;


		if (errmsg)
		if (errmsg)
			printk("%s""   [%2d] %-22s%s\n", prefix, i, errmsg,
			dev_err(&dev->dev, "   [%2d] %-22s%s\n", i, errmsg,
				info->first_error == i ? " (First)" : "");
				info->first_error == i ? " (First)" : "");
		else
		else
			printk("%s""   [%2d] Unknown Error Bit%s\n", prefix, i,
			dev_err(&dev->dev, "   [%2d] Unknown Error Bit%s\n",
				info->first_error == i ? " (First)" : "");
				i, info->first_error == i ? " (First)" : "");
	}
	}
}
}


void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
{
{
	int id = ((dev->bus->number << 8) | dev->devfn);
	int id = ((dev->bus->number << 8) | dev->devfn);
	char prefix[44];

	snprintf(prefix, sizeof(prefix), "%s%s %s: ",
		 (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR,
		 dev_driver_string(&dev->dev), dev_name(&dev->dev));


	if (info->status == 0) {
	if (info->status == 0) {
		printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, "
		dev_err(&dev->dev,
			"id=%04x(Unregistered Agent ID)\n", prefix,
			"PCIe Bus Error: severity=%s, type=Unaccessible, "
			"id=%04x(Unregistered Agent ID)\n",
			aer_error_severity_string[info->severity], id);
			aer_error_severity_string[info->severity], id);
	} else {
	} else {
		int layer, agent;
		int layer, agent;
@@ -168,22 +166,24 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
		layer = AER_GET_LAYER_ERROR(info->severity, info->status);
		layer = AER_GET_LAYER_ERROR(info->severity, info->status);
		agent = AER_GET_AGENT(info->severity, info->status);
		agent = AER_GET_AGENT(info->severity, info->status);


		printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
		dev_err(&dev->dev,
			prefix, aer_error_severity_string[info->severity],
			"PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
			aer_error_severity_string[info->severity],
			aer_error_layer[layer], id, aer_agent_string[agent]);
			aer_error_layer[layer], id, aer_agent_string[agent]);


		printk("%s""  device [%04x:%04x] error status/mask=%08x/%08x\n",
		dev_err(&dev->dev,
			prefix, dev->vendor, dev->device,
			"  device [%04x:%04x] error status/mask=%08x/%08x\n",
			dev->vendor, dev->device,
			info->status, info->mask);
			info->status, info->mask);


		__aer_print_error(prefix, info);
		__aer_print_error(dev, info);


		if (info->tlp_header_valid) {
		if (info->tlp_header_valid) {
			unsigned char *tlp = (unsigned char *) &info->tlp;
			unsigned char *tlp = (unsigned char *) &info->tlp;
			printk("%s""  TLP Header:"
			dev_err(&dev->dev, "  TLP Header:"
				" %02x%02x%02x%02x %02x%02x%02x%02x"
				" %02x%02x%02x%02x %02x%02x%02x%02x"
				" %02x%02x%02x%02x %02x%02x%02x%02x\n",
				" %02x%02x%02x%02x %02x%02x%02x%02x\n",
				prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
				*(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
				*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
				*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
				*(tlp + 11), *(tlp + 10), *(tlp + 9),
				*(tlp + 11), *(tlp + 10), *(tlp + 9),
				*(tlp + 8), *(tlp + 15), *(tlp + 14),
				*(tlp + 8), *(tlp + 15), *(tlp + 14),
@@ -192,8 +192,11 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
	}
	}


	if (info->id && info->error_dev_num > 1 && info->id == id)
	if (info->id && info->error_dev_num > 1 && info->id == id)
		printk("%s""  Error of this Agent(%04x) is reported first\n",
		dev_err(&dev->dev,
			prefix, id);
			   "  Error of this Agent(%04x) is reported first\n",
			id);
	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
			info->severity);
}
}


void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
@@ -217,7 +220,7 @@ int cper_severity_to_aer(int cper_severity)
}
}
EXPORT_SYMBOL_GPL(cper_severity_to_aer);
EXPORT_SYMBOL_GPL(cper_severity_to_aer);


void cper_print_aer(const char *prefix, int cper_severity,
void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity,
		    struct aer_capability_regs *aer)
		    struct aer_capability_regs *aer)
{
{
	int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
	int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
@@ -239,25 +242,27 @@ void cper_print_aer(const char *prefix, int cper_severity,
	}
	}
	layer = AER_GET_LAYER_ERROR(aer_severity, status);
	layer = AER_GET_LAYER_ERROR(aer_severity, status);
	agent = AER_GET_AGENT(aer_severity, status);
	agent = AER_GET_AGENT(aer_severity, status);
	printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n",
	dev_err(&dev->dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n",
	       prefix, status, mask);
	       status, mask);
	cper_print_bits(prefix, status, status_strs, status_strs_size);
	cper_print_bits(prefix, status, status_strs, status_strs_size);
	printk("%s""aer_layer=%s, aer_agent=%s\n", prefix,
	dev_err(&dev->dev, "aer_layer=%s, aer_agent=%s\n",
	       aer_error_layer[layer], aer_agent_string[agent]);
	       aer_error_layer[layer], aer_agent_string[agent]);
	if (aer_severity != AER_CORRECTABLE)
	if (aer_severity != AER_CORRECTABLE)
		printk("%s""aer_uncor_severity: 0x%08x\n",
		dev_err(&dev->dev, "aer_uncor_severity: 0x%08x\n",
		       prefix, aer->uncor_severity);
		       aer->uncor_severity);
	if (tlp_header_valid) {
	if (tlp_header_valid) {
		const unsigned char *tlp;
		const unsigned char *tlp;
		tlp = (const unsigned char *)&aer->header_log;
		tlp = (const unsigned char *)&aer->header_log;
		printk("%s""aer_tlp_header:"
		dev_err(&dev->dev, "aer_tlp_header:"
			" %02x%02x%02x%02x %02x%02x%02x%02x"
			" %02x%02x%02x%02x %02x%02x%02x%02x"
			" %02x%02x%02x%02x %02x%02x%02x%02x\n",
			" %02x%02x%02x%02x %02x%02x%02x%02x\n",
			prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
			*(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
			*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
			*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
			*(tlp + 11), *(tlp + 10), *(tlp + 9),
			*(tlp + 11), *(tlp + 10), *(tlp + 9),
			*(tlp + 8), *(tlp + 15), *(tlp + 14),
			*(tlp + 8), *(tlp + 15), *(tlp + 14),
			*(tlp + 13), *(tlp + 12));
			*(tlp + 13), *(tlp + 12));
	}
	}
	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
			aer_severity);
}
}
#endif
#endif
+2 −2
Original line number Original line Diff line number Diff line
@@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
}
}
#endif
#endif


extern void cper_print_aer(const char *prefix, int cper_severity,
extern void cper_print_aer(const char *prefix, struct pci_dev *dev,
			   struct aer_capability_regs *aer);
			   int cper_severity, struct aer_capability_regs *aer);
extern int cper_severity_to_aer(int cper_severity);
extern int cper_severity_to_aer(int cper_severity);
extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
			      int severity);
			      int severity);
+77 −0
Original line number Original line Diff line number Diff line
#undef TRACE_SYSTEM
#define TRACE_SYSTEM ras

#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_AER_H

#include <linux/tracepoint.h>
#include <linux/edac.h>


/*
 * PCIe AER Trace event
 *
 * These events are generated when hardware detects a corrected or
 * uncorrected event on a PCIe device. The event report has
 * the following structure:
 *
 * char * dev_name -	The name of the slot where the device resides
 *			([domain:]bus:device.function).
 * u32 status -		Either the correctable or uncorrectable register
 *			indicating what error or errors have been seen
 * u8 severity -	error severity 0:NONFATAL 1:FATAL 2:CORRECTED
 */

#define aer_correctable_errors		\
	{BIT(0),	"Receiver Error"},		\
	{BIT(6),	"Bad TLP"},			\
	{BIT(7),	"Bad DLLP"},			\
	{BIT(8),	"RELAY_NUM Rollover"},		\
	{BIT(12),	"Replay Timer Timeout"},	\
	{BIT(13),	"Advisory Non-Fatal"}

#define aer_uncorrectable_errors		\
	{BIT(4),	"Data Link Protocol"},		\
	{BIT(12),	"Poisoned TLP"},		\
	{BIT(13),	"Flow Control Protocol"},	\
	{BIT(14),	"Completion Timeout"},		\
	{BIT(15),	"Completer Abort"},		\
	{BIT(16),	"Unexpected Completion"},	\
	{BIT(17),	"Receiver Overflow"},		\
	{BIT(18),	"Malformed TLP"},		\
	{BIT(19),	"ECRC"},			\
	{BIT(20),	"Unsupported Request"}

TRACE_EVENT(aer_event,
	TP_PROTO(const char *dev_name,
		 const u32 status,
		 const u8 severity),

	TP_ARGS(dev_name, status, severity),

	TP_STRUCT__entry(
		__string(	dev_name,	dev_name	)
		__field(	u32,		status		)
		__field(	u8,		severity	)
	),

	TP_fast_assign(
		__assign_str(dev_name, dev_name);
		__entry->status		= status;
		__entry->severity	= severity;
	),

	TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
		__get_str(dev_name),
		__entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" :
			__entry->severity == HW_EVENT_ERR_FATAL ?
			"Fatal" : "Uncorrected",
		__entry->severity == HW_EVENT_ERR_CORRECTED ?
		__print_flags(__entry->status, "|", aer_correctable_errors) :
		__print_flags(__entry->status, "|", aer_uncorrectable_errors))
);

#endif /* _TRACE_AER_H */

/* This part must be outside protection */
#include <trace/define_trace.h>