Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 209fae14 authored by Dan Williams's avatar Dan Williams
Browse files

isci: atomic device lookup and reference counting



We have unsafe references to remote devices that are notified to
disappear at lldd_dev_gone.  In order to clean this up we need a single
canonical source for device lookups and stable references once a lookup
succeeds.  Towards that end guarantee that domain_device.lldd_dev is
NULL as soon as we start the process of stopping a device.  Any code
path that wants to safely lookup a remote device must do so through
task->dev->lldd_dev (isci_lookup_device()).

For in-flight references outside of scic_lock we need reference counting
to ensure that the device is not recycled before we are done with it.
Simplify device back references to just scic_sds_request.target_device
which is now the only permissible internal reference that is maintained
relative to the reference count.

There were two occasions where we wanted new i/o's to be treated as
SAS_TASK_UNDELIVERED but where the domain_dev->lldd_dev link is still
intact.  Introduce a 'gone' flag to prevent i/o while waiting for libsas
to take action on the port down event.

One 'core' leftover is that we currently call
scic_remote_device_destruct() from isci_remote_device_deconstruct()
which is called when the 'core' says the device is stopped.  It would be
more natural for the final put to trigger
isci_remote_device_deconstruct() but this implementation is deferred as
it requires other changes.

Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 360b03ed
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -1327,7 +1327,7 @@ void isci_host_deinit(struct isci_host *ihost)
		struct isci_remote_device *idev, *d;

		list_for_each_entry_safe(idev, d, &iport->remote_dev_list, node) {
			isci_remote_device_change_state(idev, isci_stopping);
			if (test_bit(IDEV_ALLOCATED, &idev->flags))
				isci_remote_device_stop(ihost, idev);
		}
	}
+1 −2
Original line number Diff line number Diff line
@@ -321,8 +321,7 @@ static void isci_port_link_down(struct isci_host *isci_host,
				dev_dbg(&isci_host->pdev->dev,
					"%s: isci_device = %p\n",
					__func__, isci_device);
				isci_remote_device_change_state(isci_device,
								isci_stopping);
				set_bit(IDEV_GONE, &isci_device->flags);
			}
		}
		isci_port_change_state(isci_port, isci_stopping);
+41 −39
Original line number Diff line number Diff line
@@ -94,7 +94,7 @@ static void isci_remote_device_not_ready(struct isci_host *ihost,
		"%s: isci_device = %p\n", __func__, idev);

	if (reason == SCIC_REMOTE_DEVICE_NOT_READY_STOP_REQUESTED)
		isci_remote_device_change_state(idev, isci_stopping);
		set_bit(IDEV_GONE, &idev->flags);
	else
		/* device ready is actually a "not ready for io" state. */
		isci_remote_device_change_state(idev, isci_ready);
@@ -449,9 +449,11 @@ static void scic_sds_remote_device_start_request(struct scic_sds_remote_device *
	/* cleanup requests that failed after starting on the port */
	if (status != SCI_SUCCESS)
		scic_sds_port_complete_io(sci_port, sci_dev, sci_req);
	else
	else {
		kref_get(&sci_dev_to_idev(sci_dev)->kref);
		scic_sds_remote_device_increment_request_count(sci_dev);
	}
}

enum sci_status scic_sds_remote_device_start_io(struct scic_sds_controller *scic,
						struct scic_sds_remote_device *sci_dev,
@@ -656,6 +658,8 @@ enum sci_status scic_sds_remote_device_complete_io(struct scic_sds_controller *s
			"%s: Port:0x%p Device:0x%p Request:0x%p Status:0x%x "
			"could not complete\n", __func__, sci_port,
			sci_dev, sci_req, status);
	else
		isci_put_device(sci_dev_to_idev(sci_dev));

	return status;
}
@@ -860,23 +864,11 @@ static void isci_remote_device_deconstruct(struct isci_host *ihost, struct isci_
	 * here should go through isci_remote_device_nuke_requests.
	 * If we hit this condition, we will need a way to complete
	 * io requests in process */
	while (!list_empty(&idev->reqs_in_process)) {

		dev_err(&ihost->pdev->dev,
			"%s: ** request list not empty! **\n", __func__);
		BUG();
	}
	BUG_ON(!list_empty(&idev->reqs_in_process));

	scic_remote_device_destruct(&idev->sci);
	idev->domain_dev->lldd_dev = NULL;
	idev->domain_dev = NULL;
	idev->isci_port = NULL;
	list_del_init(&idev->node);

	clear_bit(IDEV_START_PENDING, &idev->flags);
	clear_bit(IDEV_STOP_PENDING, &idev->flags);
	clear_bit(IDEV_EH, &idev->flags);
	wake_up(&ihost->eventq);
	isci_put_device(idev);
}

/**
@@ -1314,6 +1306,22 @@ isci_remote_device_alloc(struct isci_host *ihost, struct isci_port *iport)
	return idev;
}

void isci_remote_device_release(struct kref *kref)
{
	struct isci_remote_device *idev = container_of(kref, typeof(*idev), kref);
	struct isci_host *ihost = idev->isci_port->isci_host;

	idev->domain_dev = NULL;
	idev->isci_port = NULL;
	clear_bit(IDEV_START_PENDING, &idev->flags);
	clear_bit(IDEV_STOP_PENDING, &idev->flags);
	clear_bit(IDEV_GONE, &idev->flags);
	clear_bit(IDEV_EH, &idev->flags);
	smp_mb__before_clear_bit();
	clear_bit(IDEV_ALLOCATED, &idev->flags);
	wake_up(&ihost->eventq);
}

/**
 * isci_remote_device_stop() - This function is called internally to stop the
 *    remote device.
@@ -1330,7 +1338,11 @@ enum sci_status isci_remote_device_stop(struct isci_host *ihost, struct isci_rem
	dev_dbg(&ihost->pdev->dev,
		"%s: isci_device = %p\n", __func__, idev);

	spin_lock_irqsave(&ihost->scic_lock, flags);
	idev->domain_dev->lldd_dev = NULL; /* disable new lookups */
	set_bit(IDEV_GONE, &idev->flags);
	isci_remote_device_change_state(idev, isci_stopping);
	spin_unlock_irqrestore(&ihost->scic_lock, flags);

	/* Kill all outstanding requests. */
	isci_remote_device_nuke_requests(ihost, idev);
@@ -1342,14 +1354,10 @@ enum sci_status isci_remote_device_stop(struct isci_host *ihost, struct isci_rem
	spin_unlock_irqrestore(&ihost->scic_lock, flags);

	/* Wait for the stop complete callback. */
	if (status == SCI_SUCCESS) {
	if (WARN_ONCE(status != SCI_SUCCESS, "failed to stop device\n"))
		/* nothing to wait for */;
	else
		wait_for_device_stop(ihost, idev);
		clear_bit(IDEV_ALLOCATED, &idev->flags);
	}

	dev_dbg(&ihost->pdev->dev,
		"%s: idev = %p - after completion wait\n",
		__func__, idev);

	return status;
}
@@ -1416,39 +1424,33 @@ int isci_remote_device_found(struct domain_device *domain_dev)
	if (!isci_device)
		return -ENODEV;

	kref_init(&isci_device->kref);
	INIT_LIST_HEAD(&isci_device->node);
	domain_dev->lldd_dev = isci_device;

	spin_lock_irq(&isci_host->scic_lock);
	isci_device->domain_dev = domain_dev;
	isci_device->isci_port = isci_port;
	isci_remote_device_change_state(isci_device, isci_starting);


	spin_lock_irq(&isci_host->scic_lock);
	list_add_tail(&isci_device->node, &isci_port->remote_dev_list);

	set_bit(IDEV_START_PENDING, &isci_device->flags);
	status = isci_remote_device_construct(isci_port, isci_device);
	spin_unlock_irq(&isci_host->scic_lock);

	dev_dbg(&isci_host->pdev->dev,
		"%s: isci_device = %p\n",
		__func__, isci_device);

	if (status != SCI_SUCCESS) {

		spin_lock_irq(&isci_host->scic_lock);
		isci_remote_device_deconstruct(
			isci_host,
			isci_device
			);
	if (status == SCI_SUCCESS) {
		/* device came up, advertise it to the world */
		domain_dev->lldd_dev = isci_device;
	} else
		isci_put_device(isci_device);
	spin_unlock_irq(&isci_host->scic_lock);
		return -ENODEV;
	}

	/* wait for the device ready callback. */
	wait_for_device_start(isci_host, isci_device);

	return 0;
	return status == SCI_SUCCESS ? 0 : -ENODEV;
}
/**
 * isci_device_is_reset_pending() - This function will check if there is any
+23 −0
Original line number Diff line number Diff line
@@ -56,6 +56,7 @@
#ifndef _ISCI_REMOTE_DEVICE_H_
#define _ISCI_REMOTE_DEVICE_H_
#include <scsi/libsas.h>
#include <linux/kref.h>
#include "scu_remote_node_context.h"
#include "remote_node_context.h"
#include "port.h"
@@ -134,7 +135,9 @@ struct isci_remote_device {
	#define IDEV_STOP_PENDING 1
	#define IDEV_ALLOCATED 2
	#define IDEV_EH 3
	#define IDEV_GONE 4
	unsigned long flags;
	struct kref kref;
	struct isci_port *isci_port;
	struct domain_device *domain_dev;
	struct list_head node;
@@ -145,6 +148,26 @@ struct isci_remote_device {

#define ISCI_REMOTE_DEVICE_START_TIMEOUT 5000

/* device reference routines must be called under scic_lock */
static inline struct isci_remote_device *isci_lookup_device(struct domain_device *dev)
{
	struct isci_remote_device *idev = dev->lldd_dev;

	if (idev && !test_bit(IDEV_GONE, &idev->flags)) {
		kref_get(&idev->kref);
		return idev;
	}

	return NULL;
}

void isci_remote_device_release(struct kref *kref);
static inline void isci_put_device(struct isci_remote_device *idev)
{
	if (idev)
		kref_put(&idev->kref, isci_remote_device_release);
}

enum sci_status isci_remote_device_stop(struct isci_host *ihost,
					struct isci_remote_device *idev);
void isci_remote_device_nuke_requests(struct isci_host *ihost,
+21 −39
Original line number Diff line number Diff line
@@ -2313,7 +2313,7 @@ static void isci_request_set_open_reject_status(
 * none.
 */
static void isci_request_handle_controller_specific_errors(
	struct isci_remote_device *isci_device,
	struct isci_remote_device *idev,
	struct isci_request *request,
	struct sas_task *task,
	enum service_response *response_ptr,
@@ -2353,8 +2353,7 @@ static void isci_request_handle_controller_specific_errors(
			 * that we ignore the quiesce state, since we are
			 * concerned about the actual device state.
			 */
			if ((isci_device->status == isci_stopping) ||
			    (isci_device->status == isci_stopped))
			if (!idev)
				*status_ptr = SAS_DEVICE_UNKNOWN;
			else
				*status_ptr = SAS_ABORTED_TASK;
@@ -2367,8 +2366,7 @@ static void isci_request_handle_controller_specific_errors(
			/* Task in the target is not done. */
			*response_ptr = SAS_TASK_UNDELIVERED;

			if ((isci_device->status == isci_stopping) ||
			    (isci_device->status == isci_stopped))
			if (!idev)
				*status_ptr = SAS_DEVICE_UNKNOWN;
			else
				*status_ptr = SAM_STAT_TASK_ABORTED;
@@ -2399,8 +2397,7 @@ static void isci_request_handle_controller_specific_errors(
		 * that we ignore the quiesce state, since we are
		 * concerned about the actual device state.
		 */
		if ((isci_device->status == isci_stopping) ||
		    (isci_device->status == isci_stopped))
		if (!idev)
			*status_ptr = SAS_DEVICE_UNKNOWN;
		else
			*status_ptr = SAS_ABORTED_TASK;
@@ -2629,7 +2626,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
	struct ssp_response_iu *resp_iu;
	void *resp_buf;
	unsigned long task_flags;
	struct isci_remote_device *isci_device   = request->isci_device;
	struct isci_remote_device *idev = isci_lookup_device(task->dev);
	enum service_response response       = SAS_TASK_UNDELIVERED;
	enum exec_status status         = SAS_ABORTED_TASK;
	enum isci_request_status request_status;
@@ -2672,9 +2669,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
		 * that we ignore the quiesce state, since we are
		 * concerned about the actual device state.
		 */
		if ((isci_device->status == isci_stopping)
		    || (isci_device->status == isci_stopped)
		    )
		if (!idev)
			status = SAS_DEVICE_UNKNOWN;
		else
			status = SAS_ABORTED_TASK;
@@ -2697,8 +2692,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
		request->complete_in_target = true;
		response = SAS_TASK_UNDELIVERED;

		if ((isci_device->status == isci_stopping) ||
		    (isci_device->status == isci_stopped))
		if (!idev)
			/* The device has been /is being stopped. Note that
			 * we ignore the quiesce state, since we are
			 * concerned about the actual device state.
@@ -2728,8 +2722,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
		 * that we ignore the quiesce state, since we are
		 * concerned about the actual device state.
		 */
		if ((isci_device->status == isci_stopping) ||
		    (isci_device->status == isci_stopped))
		if (!idev)
			status = SAS_DEVICE_UNKNOWN;
		else
			status = SAS_ABORTED_TASK;
@@ -2861,8 +2854,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
			 * that we ignore the quiesce state, since we are
			 * concerned about the actual device state.
			 */
			if ((isci_device->status == isci_stopping) ||
			    (isci_device->status == isci_stopped))
			if (!idev)
				status = SAS_DEVICE_UNKNOWN;
			else
				status = SAS_ABORTED_TASK;
@@ -2873,7 +2865,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
		case SCI_FAILURE_CONTROLLER_SPECIFIC_IO_ERR:

			isci_request_handle_controller_specific_errors(
				isci_device, request, task, &response, &status,
				idev, request, task, &response, &status,
				&complete_to_host);

			break;
@@ -2902,8 +2894,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,

			/* Fail the I/O so it can be retried. */
			response = SAS_TASK_UNDELIVERED;
			if ((isci_device->status == isci_stopping) ||
			    (isci_device->status == isci_stopped))
			if (!idev)
				status = SAS_DEVICE_UNKNOWN;
			else
				status = SAS_ABORTED_TASK;
@@ -2926,8 +2917,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
			 * that we ignore the quiesce state, since we are
			 * concerned about the actual device state.
			 */
			if ((isci_device->status == isci_stopping) ||
			    (isci_device->status == isci_stopped))
			if (!idev)
				status = SAS_DEVICE_UNKNOWN;
			else
				status = SAS_ABORTED_TASK;
@@ -2953,8 +2943,10 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,

	/* complete the io request to the core. */
	scic_controller_complete_io(&isci_host->sci,
				    &isci_device->sci,
				    request->sci.target_device,
				    &request->sci);
	isci_put_device(idev);

	/* set terminated handle so it cannot be completed or
	 * terminated again, and to cause any calls into abort
	 * task to recognize the already completed case.
@@ -3511,7 +3503,6 @@ static enum sci_status isci_io_request_build(
}

static struct isci_request *isci_request_alloc_core(struct isci_host *ihost,
						    struct isci_remote_device *idev,
						    gfp_t gfp_flags)
{
	dma_addr_t handle;
@@ -3528,7 +3519,6 @@ static struct isci_request *isci_request_alloc_core(struct isci_host *ihost,
	spin_lock_init(&ireq->state_lock);
	ireq->request_daddr = handle;
	ireq->isci_host = ihost;
	ireq->isci_device = idev;
	ireq->io_request_completion = NULL;
	ireq->terminated = false;

@@ -3546,12 +3536,11 @@ static struct isci_request *isci_request_alloc_core(struct isci_host *ihost,

static struct isci_request *isci_request_alloc_io(struct isci_host *ihost,
						  struct sas_task *task,
						  struct isci_remote_device *idev,
						  gfp_t gfp_flags)
{
	struct isci_request *ireq;

	ireq = isci_request_alloc_core(ihost, idev, gfp_flags);
	ireq = isci_request_alloc_core(ihost, gfp_flags);
	if (ireq) {
		ireq->ttype_ptr.io_task_ptr = task;
		ireq->ttype = io_task;
@@ -3562,12 +3551,11 @@ static struct isci_request *isci_request_alloc_io(struct isci_host *ihost,

struct isci_request *isci_request_alloc_tmf(struct isci_host *ihost,
					    struct isci_tmf *isci_tmf,
					    struct isci_remote_device *idev,
					    gfp_t gfp_flags)
{
	struct isci_request *ireq;

	ireq = isci_request_alloc_core(ihost, idev, gfp_flags);
	ireq = isci_request_alloc_core(ihost, gfp_flags);
	if (ireq) {
		ireq->ttype_ptr.tmf_task_ptr = isci_tmf;
		ireq->ttype = tmf_task;
@@ -3575,21 +3563,16 @@ struct isci_request *isci_request_alloc_tmf(struct isci_host *ihost,
	return ireq;
}

int isci_request_execute(struct isci_host *ihost, struct sas_task *task,
			 gfp_t gfp_flags)
int isci_request_execute(struct isci_host *ihost, struct isci_remote_device *idev,
			 struct sas_task *task, gfp_t gfp_flags)
{
	enum sci_status status = SCI_FAILURE_UNSUPPORTED_PROTOCOL;
	struct scic_sds_remote_device *sci_dev;
	struct isci_remote_device *idev;
	struct isci_request *ireq;
	unsigned long flags;
	int ret = 0;

	idev = task->dev->lldd_dev;
	sci_dev = &idev->sci;

	/* do common allocation and init of request object. */
	ireq = isci_request_alloc_io(ihost, task, idev, gfp_flags);
	ireq = isci_request_alloc_io(ihost, task, gfp_flags);
	if (!ireq)
		goto out;

@@ -3605,8 +3588,7 @@ int isci_request_execute(struct isci_host *ihost, struct sas_task *task,
	spin_lock_irqsave(&ihost->scic_lock, flags);

	/* send the request, let the core assign the IO TAG.	*/
	status = scic_controller_start_io(&ihost->sci, sci_dev,
					  &ireq->sci,
	status = scic_controller_start_io(&ihost->sci, &idev->sci, &ireq->sci,
					  SCI_CONTROLLER_INVALID_IO_TAG);
	if (status != SCI_SUCCESS &&
	    status != SCI_FAILURE_REMOTE_DEVICE_RESET_REQUIRED) {
Loading