Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d19c4643 authored by Mike Christie's avatar Mike Christie Committed by Nicholas Bellinger
Browse files

target: Fix ALUA transition state race between multiple initiators



Multiple threads could be writing to alua_access_state at
the same time, or there could be multiple STPGs in flight
(different initiators sending them or one initiator sending
them to different ports), or a combo of both and the
core_alua_do_transition_tg_pt calls will race with each other.

Because from the last patches we no longer delay running
core_alua_do_transition_tg_pt_work, there does not seem to be
any point in running that in a workqueue. And, we always
wait for it to complete one way or another, so we can sleep
in this code path. So, this patch made over target-pending just adds a
mutex and does the work core_alua_do_transition_tg_pt_work was doing in
core_alua_do_transition_tg_pt.

There is also no need to use an atomic for the
tg_pt_gp_alua_access_state. In core_alua_do_transition_tg_pt we will
test and set it under the transition mutex. And, it is a int/32 bits
so in the other places where it is read, we will never see it partially
updated.

Signed-off-by: default avatarMike Christie <mchristi@redhat.com>
Signed-off-by: default avatarNicholas Bellinger <nab@linux-iscsi.org>
parent 7a56dc88
Loading
Loading
Loading
Loading
+48 −88
Original line number Diff line number Diff line
@@ -197,8 +197,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
		/*
		 * Set the ASYMMETRIC ACCESS State
		 */
		buf[off++] |= (atomic_read(
			&tg_pt_gp->tg_pt_gp_alua_access_state) & 0xff);
		buf[off++] |= tg_pt_gp->tg_pt_gp_alua_access_state & 0xff;
		/*
		 * Set supported ASYMMETRIC ACCESS State bits
		 */
@@ -710,7 +709,7 @@ target_alua_state_check(struct se_cmd *cmd)

	spin_lock(&lun->lun_tg_pt_gp_lock);
	tg_pt_gp = lun->lun_tg_pt_gp;
	out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
	out_alua_state = tg_pt_gp->tg_pt_gp_alua_access_state;
	nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs;

	// XXX: keeps using tg_pt_gp witout reference after unlock
@@ -911,7 +910,7 @@ static int core_alua_write_tpg_metadata(
}

/*
 * Called with tg_pt_gp->tg_pt_gp_md_mutex held
 * Called with tg_pt_gp->tg_pt_gp_transition_mutex held
 */
static int core_alua_update_tpg_primary_metadata(
	struct t10_alua_tg_pt_gp *tg_pt_gp)
@@ -934,7 +933,7 @@ static int core_alua_update_tpg_primary_metadata(
			"alua_access_state=0x%02x\n"
			"alua_access_status=0x%02x\n",
			tg_pt_gp->tg_pt_gp_id,
			tg_pt_gp->tg_pt_gp_alua_pending_state,
			tg_pt_gp->tg_pt_gp_alua_access_state,
			tg_pt_gp->tg_pt_gp_alua_access_status);

	snprintf(path, ALUA_METADATA_PATH_LEN,
@@ -1013,93 +1012,41 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp)
	spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
}

static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
{
	struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
		struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work);
	struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
	bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
			 ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);

	/*
	 * Update the ALUA metadata buf that has been allocated in
	 * core_alua_do_port_transition(), this metadata will be written
	 * to struct file.
	 *
	 * Note that there is the case where we do not want to update the
	 * metadata when the saved metadata is being parsed in userspace
	 * when setting the existing port access state and access status.
	 *
	 * Also note that the failure to write out the ALUA metadata to
	 * struct file does NOT affect the actual ALUA transition.
	 */
	if (tg_pt_gp->tg_pt_gp_write_metadata) {
		mutex_lock(&tg_pt_gp->tg_pt_gp_md_mutex);
		core_alua_update_tpg_primary_metadata(tg_pt_gp);
		mutex_unlock(&tg_pt_gp->tg_pt_gp_md_mutex);
	}
	/*
	 * Set the current primary ALUA access state to the requested new state
	 */
	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
		   tg_pt_gp->tg_pt_gp_alua_pending_state);

	pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu"
		" from primary access state %s to %s\n", (explicit) ? "explicit" :
		"implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item),
		tg_pt_gp->tg_pt_gp_id,
		core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_previous_state),
		core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state));

	core_alua_queue_state_change_ua(tg_pt_gp);

	spin_lock(&dev->t10_alua.tg_pt_gps_lock);
	atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);

	if (tg_pt_gp->tg_pt_gp_transition_complete)
		complete(tg_pt_gp->tg_pt_gp_transition_complete);
}

static int core_alua_do_transition_tg_pt(
	struct t10_alua_tg_pt_gp *tg_pt_gp,
	int new_state,
	int explicit)
{
	struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
	DECLARE_COMPLETION_ONSTACK(wait);
	int prev_state;

	mutex_lock(&tg_pt_gp->tg_pt_gp_transition_mutex);
	/* Nothing to be done here */
	if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state)
	if (tg_pt_gp->tg_pt_gp_alua_access_state == new_state) {
		mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
		return 0;
	}

	if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION)
	if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) {
		mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
		return -EAGAIN;

	/*
	 * Flush any pending transitions
	 */
	if (!explicit)
		flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
	}

	/*
	 * Save the old primary ALUA access state, and set the current state
	 * to ALUA_ACCESS_STATE_TRANSITION.
	 */
	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
			ALUA_ACCESS_STATE_TRANSITION);
	prev_state = tg_pt_gp->tg_pt_gp_alua_access_state;
	tg_pt_gp->tg_pt_gp_alua_access_state = ALUA_ACCESS_STATE_TRANSITION;
	tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
				ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG :
				ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA;

	core_alua_queue_state_change_ua(tg_pt_gp);

	if (new_state == ALUA_ACCESS_STATE_TRANSITION)
	if (new_state == ALUA_ACCESS_STATE_TRANSITION) {
		mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
		return 0;

	tg_pt_gp->tg_pt_gp_alua_previous_state =
		atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
	tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
	}

	/*
	 * Check for the optional ALUA primary state transition delay
@@ -1108,19 +1055,36 @@ static int core_alua_do_transition_tg_pt(
		msleep_interruptible(tg_pt_gp->tg_pt_gp_trans_delay_msecs);

	/*
	 * Take a reference for workqueue item
	 * Set the current primary ALUA access state to the requested new state
	 */
	spin_lock(&dev->t10_alua.tg_pt_gps_lock);
	atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
	tg_pt_gp->tg_pt_gp_alua_access_state = new_state;

	schedule_work(&tg_pt_gp->tg_pt_gp_transition_work);
	if (explicit) {
		tg_pt_gp->tg_pt_gp_transition_complete = &wait;
		wait_for_completion(&wait);
		tg_pt_gp->tg_pt_gp_transition_complete = NULL;
	/*
	 * Update the ALUA metadata buf that has been allocated in
	 * core_alua_do_port_transition(), this metadata will be written
	 * to struct file.
	 *
	 * Note that there is the case where we do not want to update the
	 * metadata when the saved metadata is being parsed in userspace
	 * when setting the existing port access state and access status.
	 *
	 * Also note that the failure to write out the ALUA metadata to
	 * struct file does NOT affect the actual ALUA transition.
	 */
	if (tg_pt_gp->tg_pt_gp_write_metadata) {
		core_alua_update_tpg_primary_metadata(tg_pt_gp);
	}

	pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu"
		" from primary access state %s to %s\n", (explicit) ? "explicit" :
		"implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item),
		tg_pt_gp->tg_pt_gp_id,
		core_alua_dump_state(prev_state),
		core_alua_dump_state(new_state));

	core_alua_queue_state_change_ua(tg_pt_gp);

	mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
	return 0;
}

@@ -1685,14 +1649,12 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev,
	}
	INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_list);
	INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_lun_list);
	mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex);
	mutex_init(&tg_pt_gp->tg_pt_gp_transition_mutex);
	spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
	atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
	INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
		  core_alua_do_transition_tg_pt_work);
	tg_pt_gp->tg_pt_gp_dev = dev;
	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
		ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED);
	tg_pt_gp->tg_pt_gp_alua_access_state =
			ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED;
	/*
	 * Enable both explicit and implicit ALUA support by default
	 */
@@ -1797,8 +1759,6 @@ void core_alua_free_tg_pt_gp(
	dev->t10_alua.alua_tg_pt_gps_counter--;
	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);

	flush_work(&tg_pt_gp->tg_pt_gp_transition_work);

	/*
	 * Allow a struct t10_alua_tg_pt_gp_member * referenced by
	 * core_alua_get_tg_pt_gp_by_name() in
@@ -1938,8 +1898,8 @@ ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *lun, char *page)
			"Primary Access Status: %s\nTG Port Secondary Access"
			" State: %s\nTG Port Secondary Access Status: %s\n",
			config_item_name(tg_pt_ci), tg_pt_gp->tg_pt_gp_id,
			core_alua_dump_state(atomic_read(
					&tg_pt_gp->tg_pt_gp_alua_access_state)),
			core_alua_dump_state(
				tg_pt_gp->tg_pt_gp_alua_access_state),
			core_alua_dump_status(
				tg_pt_gp->tg_pt_gp_alua_access_status),
			atomic_read(&lun->lun_tg_pt_secondary_offline) ?
+1 −1
Original line number Diff line number Diff line
@@ -2392,7 +2392,7 @@ static ssize_t target_tg_pt_gp_alua_access_state_show(struct config_item *item,
		char *page)
{
	return sprintf(page, "%d\n",
		atomic_read(&to_tg_pt_gp(item)->tg_pt_gp_alua_access_state));
		       to_tg_pt_gp(item)->tg_pt_gp_alua_access_state);
}

static ssize_t target_tg_pt_gp_alua_access_state_store(struct config_item *item,
+2 −6
Original line number Diff line number Diff line
@@ -280,8 +280,6 @@ struct t10_alua_tg_pt_gp {
	u16	tg_pt_gp_id;
	int	tg_pt_gp_valid_id;
	int	tg_pt_gp_alua_supported_states;
	int	tg_pt_gp_alua_pending_state;
	int	tg_pt_gp_alua_previous_state;
	int	tg_pt_gp_alua_access_status;
	int	tg_pt_gp_alua_access_type;
	int	tg_pt_gp_nonop_delay_msecs;
@@ -290,18 +288,16 @@ struct t10_alua_tg_pt_gp {
	int	tg_pt_gp_pref;
	int	tg_pt_gp_write_metadata;
	u32	tg_pt_gp_members;
	atomic_t tg_pt_gp_alua_access_state;
	int	tg_pt_gp_alua_access_state;
	atomic_t tg_pt_gp_ref_cnt;
	spinlock_t tg_pt_gp_lock;
	struct mutex tg_pt_gp_md_mutex;
	struct mutex tg_pt_gp_transition_mutex;
	struct se_device *tg_pt_gp_dev;
	struct config_group tg_pt_gp_group;
	struct list_head tg_pt_gp_list;
	struct list_head tg_pt_gp_lun_list;
	struct se_lun *tg_pt_gp_alua_lun;
	struct se_node_acl *tg_pt_gp_alua_nacl;
	struct work_struct tg_pt_gp_transition_work;
	struct completion *tg_pt_gp_transition_complete;
};

struct t10_vpd {