Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3d36a0df authored by Stefan Richter's avatar Stefan Richter
Browse files

firewire: keep highlevel drivers attached during brief connection loss



There are situations when nodes vanish from the bus and come back
quickly thereafter:
  - When certain bus-powered hubs are plugged in,
  - when certain devices are plugged into 6-port hubs,
  - when certain disk enclosures are switched from self-power to bus
    power or vice versa and break the daisy chain during the transition,
  - when the user plugs a cable out and quickly plugs it back in, e.g.
    to reorder a daisy chain (works on Mac OS X if done quickly enough),
  - when certain hubs temporarily malfunction during high bus traffic.

Until now, firewire-core reported affected nodes as lost to the
highlevel drivers (firewire-sbp2 and userspace drivers).  We now delay
the destruction of device representations until after at least two
seconds after the last bus reset.  If a "new" device is detected in this
period whose bus information block and root directory header match that
of a device which is pending for deletion, we resurrect that device and
send update calls to highlevel drivers.

Signed-off-by: default avatarStefan Richter <stefanr@s5r6.in-berlin.de>
parent 8cd0bbbd
Loading
Loading
Loading
Loading
+99 −22
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/idr.h>
#include <linux/jiffies.h>
#include <linux/string.h>
#include <linux/rwsem.h>
#include <linux/semaphore.h>
@@ -634,12 +635,38 @@ struct fw_device *fw_device_get_by_devt(dev_t devt)
	return device;
}

/*
 * These defines control the retry behavior for reading the config
 * rom.  It shouldn't be necessary to tweak these; if the device
 * doesn't respond to a config rom read within 10 seconds, it's not
 * going to respond at all.  As for the initial delay, a lot of
 * devices will be able to respond within half a second after bus
 * reset.  On the other hand, it's not really worth being more
 * aggressive than that, since it scales pretty well; if 10 devices
 * are plugged in, they're all getting read within one second.
 */

#define MAX_RETRIES	10
#define RETRY_DELAY	(3 * HZ)
#define INITIAL_DELAY	(HZ / 2)
#define SHUTDOWN_DELAY	(2 * HZ)

static void fw_device_shutdown(struct work_struct *work)
{
	struct fw_device *device =
		container_of(work, struct fw_device, work.work);
	int minor = MINOR(device->device.devt);

	if (time_is_after_jiffies(device->card->reset_jiffies + SHUTDOWN_DELAY)) {
		schedule_delayed_work(&device->work, SHUTDOWN_DELAY);
		return;
	}

	if (atomic_cmpxchg(&device->state,
			   FW_DEVICE_GONE,
			   FW_DEVICE_SHUTDOWN) != FW_DEVICE_GONE)
		return;

	fw_device_cdev_remove(device);
	device_for_each_child(&device->device, NULL, shutdown_unit);
	device_unregister(&device->device);
@@ -647,6 +674,7 @@ static void fw_device_shutdown(struct work_struct *work)
	down_write(&fw_device_rwsem);
	idr_remove(&fw_device_idr, minor);
	up_write(&fw_device_rwsem);

	fw_device_put(device);
}

@@ -654,25 +682,63 @@ static struct device_type fw_device_type = {
	.release	= fw_device_release,
};

static void fw_device_update(struct work_struct *work);

/*
 * These defines control the retry behavior for reading the config
 * rom.  It shouldn't be necessary to tweak these; if the device
 * doesn't respond to a config rom read within 10 seconds, it's not
 * going to respond at all.  As for the initial delay, a lot of
 * devices will be able to respond within half a second after bus
 * reset.  On the other hand, it's not really worth being more
 * aggressive than that, since it scales pretty well; if 10 devices
 * are plugged in, they're all getting read within one second.
 * If a device was pending for deletion because its node went away but its
 * bus info block and root directory header matches that of a newly discovered
 * device, revive the existing fw_device.
 * The newly allocated fw_device becomes obsolete instead.
 */
static int lookup_existing_device(struct device *dev, void *data)
{
	struct fw_device *old = fw_device(dev);
	struct fw_device *new = data;
	struct fw_card *card = new->card;
	int match = 0;

	down_read(&fw_device_rwsem); /* serialize config_rom access */
	spin_lock_irq(&card->lock);  /* serialize node access */

	if (memcmp(old->config_rom, new->config_rom, 6 * 4) == 0 &&
	    atomic_cmpxchg(&old->state,
			   FW_DEVICE_GONE,
			   FW_DEVICE_RUNNING) == FW_DEVICE_GONE) {
		struct fw_node *current_node = new->node;
		struct fw_node *obsolete_node = old->node;

		new->node = obsolete_node;
		new->node->data = new;
		old->node = current_node;
		old->node->data = old;

		old->max_speed = new->max_speed;
		old->node_id = current_node->node_id;
		smp_wmb();  /* update node_id before generation */
		old->generation = card->generation;
		old->config_rom_retries = 0;
		fw_notify("rediscovered device %s\n", dev_name(dev));

#define MAX_RETRIES	10
#define RETRY_DELAY	(3 * HZ)
#define INITIAL_DELAY	(HZ / 2)
		PREPARE_DELAYED_WORK(&old->work, fw_device_update);
		schedule_delayed_work(&old->work, 0);

		if (current_node == card->root_node)
			fw_schedule_bm_work(card, 0);

		match = 1;
	}

	spin_unlock_irq(&card->lock);
	up_read(&fw_device_rwsem);

	return match;
}

static void fw_device_init(struct work_struct *work)
{
	struct fw_device *device =
		container_of(work, struct fw_device, work.work);
	struct device *revived_dev;
	int minor, err;

	/*
@@ -696,6 +762,15 @@ static void fw_device_init(struct work_struct *work)
		return;
	}

	revived_dev = device_find_child(device->card->device,
					device, lookup_existing_device);
	if (revived_dev) {
		put_device(revived_dev);
		fw_device_release(&device->device);

		return;
	}

	device_initialize(&device->device);

	fw_device_get(device);
@@ -735,8 +810,9 @@ static void fw_device_init(struct work_struct *work)
	 */
	if (atomic_cmpxchg(&device->state,
			   FW_DEVICE_INITIALIZING,
		    FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN) {
		fw_device_shutdown(work);
			   FW_DEVICE_RUNNING) == FW_DEVICE_GONE) {
		PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
		schedule_delayed_work(&device->work, SHUTDOWN_DELAY);
	} else {
		if (device->config_rom_retries)
			fw_notify("created device %s: GUID %08x%08x, S%d00, "
@@ -848,7 +924,7 @@ static void fw_device_refresh(struct work_struct *work)
	case REREAD_BIB_UNCHANGED:
		if (atomic_cmpxchg(&device->state,
				   FW_DEVICE_INITIALIZING,
			    FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN)
				   FW_DEVICE_RUNNING) == FW_DEVICE_GONE)
			goto gone;

		fw_device_update(work);
@@ -880,7 +956,7 @@ static void fw_device_refresh(struct work_struct *work)

	if (atomic_cmpxchg(&device->state,
			   FW_DEVICE_INITIALIZING,
		    FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN)
			   FW_DEVICE_RUNNING) == FW_DEVICE_GONE)
		goto gone;

	fw_notify("refreshed device %s\n", dev_name(&device->device));
@@ -890,8 +966,9 @@ static void fw_device_refresh(struct work_struct *work)
 give_up:
	fw_notify("giving up on refresh of device %s\n", dev_name(&device->device));
 gone:
	atomic_set(&device->state, FW_DEVICE_SHUTDOWN);
	fw_device_shutdown(work);
	atomic_set(&device->state, FW_DEVICE_GONE);
	PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
	schedule_delayed_work(&device->work, SHUTDOWN_DELAY);
 out:
	if (node_id == card->root_node->node_id)
		fw_schedule_bm_work(card, 0);
@@ -995,9 +1072,9 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
		 */
		device = node->data;
		if (atomic_xchg(&device->state,
				FW_DEVICE_SHUTDOWN) == FW_DEVICE_RUNNING) {
				FW_DEVICE_GONE) == FW_DEVICE_RUNNING) {
			PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
			schedule_delayed_work(&device->work, 0);
			schedule_delayed_work(&device->work, SHUTDOWN_DELAY);
		}
		break;
	}
+1 −0
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
enum fw_device_state {
	FW_DEVICE_INITIALIZING,
	FW_DEVICE_RUNNING,
	FW_DEVICE_GONE,
	FW_DEVICE_SHUTDOWN,
};