Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1c111b6c authored by OGAWA Hirofumi's avatar OGAWA Hirofumi Committed by Greg Kroah-Hartman
Browse files

xhci: Fix race related to abort operation



Current abort operation has race.

    xhci_handle_command_timeout()
      xhci_abort_cmd_ring()
        xhci_write_64(CMD_RING_ABORT)
        xhci_handshake(5s)
	  do {
	    check CMD_RING_RUNNING
            udelay(1)
					 ...
					 COMP_CMD_ABORT event
					 COMP_CMD_STOP event
					 xhci_handle_stopped_cmd_ring()
					   restart cmd_ring
                                           CMD_RING_RUNNING become 1 again
	  } while ()
          return -ETIMEDOUT
        xhci_write_64(CMD_RING_ABORT)
        /* can abort random command */

To do abort operation correctly, we have to wait both of COMP_CMD_STOP
event and negation of CMD_RING_RUNNING.

But like above, while timeout handler is waiting negation of
CMD_RING_RUNNING, event handler can restart cmd_ring. So timeout
handler never be notice negation of CMD_RING_RUNNING, and retry of
CMD_RING_ABORT can abort random command (BTW, I guess retry of
CMD_RING_ABORT was workaround of this race).

To fix this race, this moves xhci_handle_stopped_cmd_ring() to
xhci_abort_cmd_ring().  And timeout handler waits COMP_CMD_STOP event.

At this point, timeout handler is owner of cmd_ring, and safely
restart cmd_ring by using xhci_handle_stopped_cmd_ring().

[FWIW, as bonus, this way would be easily extend to add CMD_RING_PAUSE
operation]

[locks edited as patch is rebased on other locking fixes -Mathias]
Signed-off-by: default avatarOGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: default avatarMathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent cb4d5ce5
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -2378,6 +2378,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)

	/* init command timeout work */
	INIT_DELAYED_WORK(&xhci->cmd_timer, xhci_handle_command_timeout);
	init_completion(&xhci->cmd_ring_stop_completion);

	page_size = readl(&xhci->op_regs->page_size);
	xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+88 −80
Original line number Diff line number Diff line
@@ -284,23 +284,71 @@ static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay)
	return mod_delayed_work(system_wq, &xhci->cmd_timer, delay);
}

static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
static struct xhci_command *xhci_next_queued_cmd(struct xhci_hcd *xhci)
{
	return list_first_entry_or_null(&xhci->cmd_list, struct xhci_command,
					cmd_list);
}

/*
 * Turn all commands on command ring with status set to "aborted" to no-op trbs.
 * If there are other commands waiting then restart the ring and kick the timer.
 * This must be called with command ring stopped and xhci->lock held.
 */
static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
					 struct xhci_command *cur_cmd)
{
	struct xhci_command *i_cmd;
	u32 cycle_state;

	/* Turn all aborted commands in list to no-ops, then restart */
	list_for_each_entry(i_cmd, &xhci->cmd_list, cmd_list) {

		if (i_cmd->status != COMP_CMD_ABORT)
			continue;

		i_cmd->status = COMP_CMD_STOP;

		xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
			 i_cmd->command_trb);
		/* get cycle state from the original cmd trb */
		cycle_state = le32_to_cpu(
			i_cmd->command_trb->generic.field[3]) &	TRB_CYCLE;
		/* modify the command trb to no-op command */
		i_cmd->command_trb->generic.field[0] = 0;
		i_cmd->command_trb->generic.field[1] = 0;
		i_cmd->command_trb->generic.field[2] = 0;
		i_cmd->command_trb->generic.field[3] = cpu_to_le32(
			TRB_TYPE(TRB_CMD_NOOP) | cycle_state);

		/*
		 * caller waiting for completion is called when command
		 *  completion event is received for these no-op commands
		 */
	}

	xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;

	/* ring command ring doorbell to restart the command ring */
	if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) &&
	    !(xhci->xhc_state & XHCI_STATE_DYING)) {
		xhci->current_cmd = cur_cmd;
		xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT);
		xhci_ring_cmd_db(xhci);
	}
}

/* Must be called with xhci->lock held, releases and aquires lock back */
static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags)
{
	u64 temp_64;
	int ret;

	xhci_dbg(xhci, "Abort command ring\n");

	temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
	xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
	reinit_completion(&xhci->cmd_ring_stop_completion);

	/*
	 * Writing the CMD_RING_ABORT bit should cause a cmd completion event,
	 * however on some host hw the CMD_RING_RUNNING bit is correctly cleared
	 * but the completion event in never sent. Use the cmd timeout timer to
	 * handle those cases. Use twice the time to cover the bit polling retry
	 */
	xhci_mod_cmd_timer(xhci, 2 * XHCI_CMD_DEFAULT_TIMEOUT);
	temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
	xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
			&xhci->op_regs->cmd_ring);

@@ -320,17 +368,30 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
		udelay(1000);
		ret = xhci_handshake(&xhci->op_regs->cmd_ring,
				     CMD_RING_RUNNING, 0, 3 * 1000 * 1000);
		if (ret == 0)
			return 0;

		if (ret < 0) {
			xhci_err(xhci, "Stopped the command ring failed, "
				 "maybe the host is dead\n");
		cancel_delayed_work(&xhci->cmd_timer);
			xhci->xhc_state |= XHCI_STATE_DYING;
			xhci_halt(xhci);
			return -ESHUTDOWN;
		}

	}
	/*
	 * Writing the CMD_RING_ABORT bit should cause a cmd completion event,
	 * however on some host hw the CMD_RING_RUNNING bit is correctly cleared
	 * but the completion event in never sent. Wait 2 secs (arbitrary
	 * number) to handle those cases after negation of CMD_RING_RUNNING.
	 */
	spin_unlock_irqrestore(&xhci->lock, flags);
	ret = wait_for_completion_timeout(&xhci->cmd_ring_stop_completion,
					  msecs_to_jiffies(2000));
	spin_lock_irqsave(&xhci->lock, flags);
	if (!ret) {
		xhci_dbg(xhci, "No stop event for abort, ring start fail?\n");
		xhci_cleanup_command_queue(xhci);
	} else {
		xhci_handle_stopped_cmd_ring(xhci, xhci_next_queued_cmd(xhci));
	}
	return 0;
}

@@ -1212,64 +1273,12 @@ void xhci_cleanup_command_queue(struct xhci_hcd *xhci)
		xhci_complete_del_and_free_cmd(cur_cmd, COMP_CMD_ABORT);
}

/*
 * Turn all commands on command ring with status set to "aborted" to no-op trbs.
 * If there are other commands waiting then restart the ring and kick the timer.
 * This must be called with command ring stopped and xhci->lock held.
 */
static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
					 struct xhci_command *cur_cmd)
{
	struct xhci_command *i_cmd, *tmp_cmd;
	u32 cycle_state;

	/* Turn all aborted commands in list to no-ops, then restart */
	list_for_each_entry_safe(i_cmd, tmp_cmd, &xhci->cmd_list,
				 cmd_list) {

		if (i_cmd->status != COMP_CMD_ABORT)
			continue;

		i_cmd->status = COMP_CMD_STOP;

		xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
			 i_cmd->command_trb);
		/* get cycle state from the original cmd trb */
		cycle_state = le32_to_cpu(
			i_cmd->command_trb->generic.field[3]) &	TRB_CYCLE;
		/* modify the command trb to no-op command */
		i_cmd->command_trb->generic.field[0] = 0;
		i_cmd->command_trb->generic.field[1] = 0;
		i_cmd->command_trb->generic.field[2] = 0;
		i_cmd->command_trb->generic.field[3] = cpu_to_le32(
			TRB_TYPE(TRB_CMD_NOOP) | cycle_state);

		/*
		 * caller waiting for completion is called when command
		 *  completion event is received for these no-op commands
		 */
	}

	xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;

	/* ring command ring doorbell to restart the command ring */
	if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) &&
	    !(xhci->xhc_state & XHCI_STATE_DYING)) {
		xhci->current_cmd = cur_cmd;
		xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT);
		xhci_ring_cmd_db(xhci);
	}
	return;
}


void xhci_handle_command_timeout(struct work_struct *work)
{
	struct xhci_hcd *xhci;
	int ret;
	unsigned long flags;
	u64 hw_ring_state;
	bool second_timeout = false;

	xhci = container_of(to_delayed_work(work), struct xhci_hcd, cmd_timer);

@@ -1283,18 +1292,17 @@ void xhci_handle_command_timeout(struct work_struct *work)
		spin_unlock_irqrestore(&xhci->lock, flags);
		return;
	}

	/* mark this command to be cancelled */
	if (xhci->current_cmd->status == COMP_CMD_ABORT)
		second_timeout = true;
	xhci->current_cmd->status = COMP_CMD_ABORT;

	/* Make sure command ring is running before aborting it */
	hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
	if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) &&
	    (hw_ring_state & CMD_RING_RUNNING))  {
		/* Prevent new doorbell, and start command abort */
		xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
		xhci_dbg(xhci, "Command timeout\n");
		ret = xhci_abort_cmd_ring(xhci);
		ret = xhci_abort_cmd_ring(xhci, flags);
		if (unlikely(ret == -ESHUTDOWN)) {
			xhci_err(xhci, "Abort command ring failed\n");
			xhci_cleanup_command_queue(xhci);
@@ -1308,9 +1316,9 @@ void xhci_handle_command_timeout(struct work_struct *work)
		goto time_out_completed;
	}

	/* command ring failed to restart, or host removed. Bail out */
	if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) {
		xhci_dbg(xhci, "command timed out twice, ring start fail?\n");
	/* host removed. Bail out */
	if (xhci->xhc_state & XHCI_STATE_REMOVING) {
		xhci_dbg(xhci, "host removed, ring start fail?\n");
		xhci_cleanup_command_queue(xhci);

		goto time_out_completed;
@@ -1360,7 +1368,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,

	/* If CMD ring stopped we own the trbs between enqueue and dequeue */
	if (cmd_comp_code == COMP_CMD_STOP) {
		xhci_handle_stopped_cmd_ring(xhci, cmd);
		complete_all(&xhci->cmd_ring_stop_completion);
		return;
	}

+1 −0
Original line number Diff line number Diff line
@@ -1569,6 +1569,7 @@ struct xhci_hcd {
	struct list_head        cmd_list;
	unsigned int		cmd_ring_reserved_trbs;
	struct delayed_work	cmd_timer;
	struct completion	cmd_ring_stop_completion;
	struct xhci_command	*current_cmd;
	struct xhci_ring	*event_ring;
	struct xhci_erst	erst;