Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 750c721e authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'devlink-Add-support-for-region-access'



Alex Vesker says:

====================
devlink: Add support for region access

This is a proposal which will allow access to driver defined address
regions using devlink. Each device can create its supported address
regions and register them. A device which exposes a region will allow
access to it using devlink.

The suggested implementation will allow exposing regions to the user,
reading and dumping snapshots taken from different regions.
A snapshot represents a memory image of a region taken by the driver.

If a device collects a snapshot of an address region it can be later
exposed using devlink region read or dump commands.
This functionality allows for future analyses on the snapshots to be
done.

The major benefit of this support is not only to provide access to
internal address regions which were inaccessible to the user but also
to provide an additional way to debug complex error states using the
region snapshots.

Implemented commands:
$ devlink region help
$ devlink region show [ DEV/REGION ]
$ devlink region del DEV/REGION snapshot SNAPSHOT_ID
$ devlink region dump DEV/REGION [ snapshot SNAPSHOT_ID ]
$ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ]
	address ADDRESS length length

Show all of the exposed regions with region sizes:
$ devlink region show
pci/0000:00:05.0/cr-space: size 1048576 snapshot [1 2]
pci/0000:00:05.0/fw-health: size 64 snapshot [1 2]

Delete a snapshot using:
$ devlink region del pci/0000:00:05.0/cr-space snapshot 1

Dump a snapshot:
$ devlink region dump pci/0000:00:05.0/fw-health snapshot 1
0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30
0000000000000010 0000 0000 ffff ff04 0029 8c00 0028 8cc8
0000000000000020 0016 0bb8 0016 1720 0000 0000 c00f 3ffc
0000000000000030 bada cce5 bada cce5 bada cce5 bada cce5

Read a specific part of a snapshot:
$ devlink region read pci/0000:00:05.0/fw-health snapshot 1 address 0
	length 16
0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30

For more information you can check devlink-region.8 man page

Future:
There is a plan to extend the support to include a write command
as well as performing read and dump live region

v1->v2:
-Add a parameter to enable devlink region snapshot
-Allocate snapshot memory using kvmalloc
-Introduce destructor function devlink_snapshot_data_dest_t to avoid
 double allocation

v2->v3:
-Fix incorrect comment in devlink.h for DEVLINK_ATTR_REGION_SIZE
 from u32 to u64
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 23c9ef2b 3c641ba4
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@ obj-$(CONFIG_MLX4_CORE) += mlx4_core.o

mlx4_core-y :=	alloc.o catas.o cmd.o cq.o eq.o fw.o fw_qos.o icm.o intf.o \
		main.o mcg.o mr.o pd.o port.o profile.o qp.o reset.o sense.o \
		srq.o resource_tracker.o
		srq.o resource_tracker.o crdump.o

obj-$(CONFIG_MLX4_EN)               += mlx4_en.o

+4 −2
Original line number Diff line number Diff line
@@ -178,10 +178,12 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)

	dev = persist->dev;
	mlx4_err(dev, "device is going to be reset\n");
	if (mlx4_is_slave(dev))
	if (mlx4_is_slave(dev)) {
		err = mlx4_reset_slave(dev);
	else
	} else {
		mlx4_crdump_collect(dev);
		err = mlx4_reset_master(dev);
	}

	if (!err) {
		mlx4_err(dev, "device was reset successfully\n");
+239 −0
Original line number Diff line number Diff line
/*
 * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include "mlx4.h"

#define BAD_ACCESS			0xBADACCE5
#define HEALTH_BUFFER_SIZE		0x40
#define CR_ENABLE_BIT			swab32(BIT(6))
#define CR_ENABLE_BIT_OFFSET		0xF3F04
#define MAX_NUM_OF_DUMPS_TO_STORE	(8)

static const char *region_cr_space_str = "cr-space";
static const char *region_fw_health_str = "fw-health";

/* Set to true in case cr enable bit was set to true before crdump */
static bool crdump_enbale_bit_set;

static void crdump_enable_crspace_access(struct mlx4_dev *dev,
					 u8 __iomem *cr_space)
{
	/* Get current enable bit value */
	crdump_enbale_bit_set =
		readl(cr_space + CR_ENABLE_BIT_OFFSET) & CR_ENABLE_BIT;

	/* Enable FW CR filter (set bit6 to 0) */
	if (crdump_enbale_bit_set)
		writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) & ~CR_ENABLE_BIT,
		       cr_space + CR_ENABLE_BIT_OFFSET);

	/* Enable block volatile crspace accesses */
	writel(swab32(1), cr_space + dev->caps.health_buffer_addrs +
	       HEALTH_BUFFER_SIZE);
}

static void crdump_disable_crspace_access(struct mlx4_dev *dev,
					  u8 __iomem *cr_space)
{
	/* Disable block volatile crspace accesses */
	writel(0, cr_space + dev->caps.health_buffer_addrs +
	       HEALTH_BUFFER_SIZE);

	/* Restore FW CR filter value (set bit6 to original value) */
	if (crdump_enbale_bit_set)
		writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) | CR_ENABLE_BIT,
		       cr_space + CR_ENABLE_BIT_OFFSET);
}

static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev,
					u8 __iomem *cr_space,
					u32 id)
{
	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
	struct pci_dev *pdev = dev->persist->pdev;
	unsigned long cr_res_size;
	u8 *crspace_data;
	int offset;
	int err;

	if (!crdump->region_crspace) {
		mlx4_err(dev, "crdump: cr-space region is NULL\n");
		return;
	}

	/* Try to collect CR space */
	cr_res_size = pci_resource_len(pdev, 0);
	crspace_data = kvmalloc(cr_res_size, GFP_KERNEL);
	if (crspace_data) {
		for (offset = 0; offset < cr_res_size; offset += 4)
			*(u32 *)(crspace_data + offset) =
					readl(cr_space + offset);

		err = devlink_region_snapshot_create(crdump->region_crspace,
						     cr_res_size, crspace_data,
						     id, &kvfree);
		if (err) {
			kvfree(crspace_data);
			mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
				  region_cr_space_str, id, err);
		} else {
			mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
				  id, region_cr_space_str);
		}
	} else {
		mlx4_err(dev, "crdump: Failed to allocate crspace buffer\n");
	}
}

static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev,
					  u8 __iomem *cr_space,
					  u32 id)
{
	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
	u8 *health_data;
	int offset;
	int err;

	if (!crdump->region_fw_health) {
		mlx4_err(dev, "crdump: fw-health region is NULL\n");
		return;
	}

	/* Try to collect health buffer */
	health_data = kvmalloc(HEALTH_BUFFER_SIZE, GFP_KERNEL);
	if (health_data) {
		u8 __iomem *health_buf_start =
				cr_space + dev->caps.health_buffer_addrs;

		for (offset = 0; offset < HEALTH_BUFFER_SIZE; offset += 4)
			*(u32 *)(health_data + offset) =
					readl(health_buf_start + offset);

		err = devlink_region_snapshot_create(crdump->region_fw_health,
						     HEALTH_BUFFER_SIZE,
						     health_data,
						     id, &kvfree);
		if (err) {
			kvfree(health_data);
			mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
				  region_fw_health_str, id, err);
		} else {
			mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
				  id, region_fw_health_str);
		}
	} else {
		mlx4_err(dev, "crdump: Failed to allocate health buffer\n");
	}
}

int mlx4_crdump_collect(struct mlx4_dev *dev)
{
	struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
	struct pci_dev *pdev = dev->persist->pdev;
	unsigned long cr_res_size;
	u8 __iomem *cr_space;
	u32 id;

	if (!dev->caps.health_buffer_addrs) {
		mlx4_info(dev, "crdump: FW doesn't support health buffer access, skipping\n");
		return 0;
	}

	if (!crdump->snapshot_enable) {
		mlx4_info(dev, "crdump: devlink snapshot disabled, skipping\n");
		return 0;
	}

	cr_res_size = pci_resource_len(pdev, 0);

	cr_space = ioremap(pci_resource_start(pdev, 0), cr_res_size);
	if (!cr_space) {
		mlx4_err(dev, "crdump: Failed to map pci cr region\n");
		return -ENODEV;
	}

	crdump_enable_crspace_access(dev, cr_space);

	/* Get the available snapshot ID for the dumps */
	id = devlink_region_shapshot_id_get(devlink);

	/* Try to capture dumps */
	mlx4_crdump_collect_crspace(dev, cr_space, id);
	mlx4_crdump_collect_fw_health(dev, cr_space, id);

	crdump_disable_crspace_access(dev, cr_space);

	iounmap(cr_space);
	return 0;
}

int mlx4_crdump_init(struct mlx4_dev *dev)
{
	struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
	struct pci_dev *pdev = dev->persist->pdev;

	crdump->snapshot_enable = false;

	/* Create cr-space region */
	crdump->region_crspace =
		devlink_region_create(devlink,
				      region_cr_space_str,
				      MAX_NUM_OF_DUMPS_TO_STORE,
				      pci_resource_len(pdev, 0));
	if (IS_ERR(crdump->region_crspace))
		mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
			  region_cr_space_str,
			  PTR_ERR(crdump->region_crspace));

	/* Create fw-health region */
	crdump->region_fw_health =
		devlink_region_create(devlink,
				      region_fw_health_str,
				      MAX_NUM_OF_DUMPS_TO_STORE,
				      HEALTH_BUFFER_SIZE);
	if (IS_ERR(crdump->region_fw_health))
		mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
			  region_fw_health_str,
			  PTR_ERR(crdump->region_fw_health));

	return 0;
}

void mlx4_crdump_end(struct mlx4_dev *dev)
{
	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;

	devlink_region_destroy(crdump->region_fw_health);
	devlink_region_destroy(crdump->region_crspace);
}
+4 −1
Original line number Diff line number Diff line
@@ -825,7 +825,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET	0xcc
#define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET	0xd0
#define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET	0xd2

#define QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET	0xe4

	dev_cap->flags2 = 0;
	mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -1082,6 +1082,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
		dev_cap->rl_caps.min_unit = size >> 14;
	}

	MLX4_GET(dev_cap->health_buffer_addrs, outbox,
		 QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET);

	MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
	if (field32 & (1 << 16))
		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
+1 −0
Original line number Diff line number Diff line
@@ -128,6 +128,7 @@ struct mlx4_dev_cap {
	u32 dmfs_high_rate_qpn_base;
	u32 dmfs_high_rate_qpn_range;
	struct mlx4_rate_limit_caps rl_caps;
	u32 health_buffer_addrs;
	struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
	bool wol_port[MLX4_MAX_PORTS + 1];
};
Loading