Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c992cf65 authored by Arun KS's avatar Arun KS Committed by Kyle Yan
Browse files

soc: qcom: ssr: Add option to relax on ssr failures



SSR failures are considered fatal and results in system panic.
In certain scenarios system can continue to work even with a
failed subsystem.

Add an option in subsystem descriptor to relax on ssr failures.

Change-Id: I86dcaa615d6443937077880d9a91070d9c22ea1f
Signed-off-by: default avatarArun KS <arunks@codeaurora.org>
[satyap@codeaurora.org: trivial merge conflict resolution]
Signed-off-by: default avatarSatya Durga Srinivasu Prabhala <satyap@codeaurora.org>
Signed-off-by: default avatarAvaneesh Kumar Dwivedi <akdwived@codeaurora.org>
parent f48b62b7
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -67,6 +67,7 @@ Optional properties:
- qcom,complete-ramdump: Boolean. If set, complete ramdump i.e. region between start address of
			first segment to end address of last segment will be collected without
			leaving any hole in between.
- qcom,ignore-ssr-failure: Boolean. If set, SSR failures are not considered fatal.

Example:
	qcom,venus@fdce0000 {
+57 −17
Original line number Diff line number Diff line
@@ -481,17 +481,21 @@ static void send_sysmon_notif(struct subsys_device *dev)
	mutex_unlock(&subsys_list_lock);
}

static void for_each_subsys_device(struct subsys_device **list,
static int for_each_subsys_device(struct subsys_device **list,
		unsigned int count, void *data,
		void (*fn)(struct subsys_device *, void *))
		int (*fn)(struct subsys_device *, void *))
{
	int ret;
	while (count--) {
		struct subsys_device *dev = *list++;

		if (!dev)
			continue;
		fn(dev, data);
		ret = fn(dev, data);
		if (ret)
			return ret;
	}
	return 0;
}

static void notify_each_subsys_device(struct subsys_device **list,
@@ -593,21 +597,31 @@ static int wait_for_err_ready(struct subsys_device *subsys)
	return 0;
}

static void subsystem_shutdown(struct subsys_device *dev, void *data)
static int subsystem_shutdown(struct subsys_device *dev, void *data)
{
	const char *name = dev->desc->name;
	int ret;

	pr_info("[%s:%d]: Shutting down %s\n",
			current->comm, current->pid, name);
	if (dev->desc->shutdown(dev->desc, true) < 0)
	ret = dev->desc->shutdown(dev->desc, true);
	if (ret < 0) {
		if (!dev->desc->ignore_ssr_failure) {
			panic("subsys-restart: [%s:%d]: Failed to shutdown %s!",
				current->comm, current->pid, name);
		} else {
			pr_err("Shutdown failure on %s\n", name);
			return ret;
		}
	}
	dev->crash_count++;
	subsys_set_state(dev, SUBSYS_OFFLINE);
	disable_all_irqs(dev);

	return 0;
}

static void subsystem_ramdump(struct subsys_device *dev, void *data)
static int subsystem_ramdump(struct subsys_device *dev, void *data)
{
	const char *name = dev->desc->name;

@@ -616,15 +630,17 @@ static void subsystem_ramdump(struct subsys_device *dev, void *data)
			pr_warn("%s[%s:%d]: Ramdump failed.\n",
				name, current->comm, current->pid);
	dev->do_ramdump_on_put = false;
	return 0;
}

static void subsystem_free_memory(struct subsys_device *dev, void *data)
static int subsystem_free_memory(struct subsys_device *dev, void *data)
{
	if (dev->desc->free_memory)
		dev->desc->free_memory(dev->desc);
	return 0;
}

static void subsystem_powerup(struct subsys_device *dev, void *data)
static int subsystem_powerup(struct subsys_device *dev, void *data)
{
	const char *name = dev->desc->name;
	int ret;
@@ -632,11 +648,17 @@ static void subsystem_powerup(struct subsys_device *dev, void *data)
	pr_info("[%s:%d]: Powering up %s\n", current->comm, current->pid, name);
	init_completion(&dev->err_ready);

	if (dev->desc->powerup(dev->desc) < 0) {
	ret = dev->desc->powerup(dev->desc);
	if (ret < 0) {
		notify_each_subsys_device(&dev, 1, SUBSYS_POWERUP_FAILURE,
								NULL);
		if (!dev->desc->ignore_ssr_failure) {
			panic("[%s:%d]: Powerup error: %s!",
				current->comm, current->pid, name);
		} else {
			pr_err("Powerup failure on %s\n", name);
			return ret;
		}
	}
	enable_all_irqs(dev);

@@ -644,11 +666,16 @@ static void subsystem_powerup(struct subsys_device *dev, void *data)
	if (ret) {
		notify_each_subsys_device(&dev, 1, SUBSYS_POWERUP_FAILURE,
								NULL);
		if (!dev->desc->ignore_ssr_failure)
			panic("[%s:%d]: Timed out waiting for error ready: %s!",
				current->comm, current->pid, name);
		else
			return ret;
	}
	subsys_set_state(dev, SUBSYS_ONLINE);
	subsys_set_crash_status(dev, CRASH_STATUS_NO_CRASH);

	return 0;
}

static int __find_subsys(struct device *dev, void *data)
@@ -910,6 +937,7 @@ static void subsystem_restart_wq_func(struct work_struct *work)
	struct subsys_tracking *track;
	unsigned int count;
	unsigned long flags;
	int ret;

	/*
	 * It's OK to not take the registration lock at this point.
@@ -957,7 +985,9 @@ static void subsystem_restart_wq_func(struct work_struct *work)
	pr_debug("[%s:%d]: Starting restart sequence for %s\n",
			current->comm, current->pid, desc->name);
	notify_each_subsys_device(list, count, SUBSYS_BEFORE_SHUTDOWN, NULL);
	for_each_subsys_device(list, count, NULL, subsystem_shutdown);
	ret = for_each_subsys_device(list, count, NULL, subsystem_shutdown);
	if (ret)
		goto err;
	notify_each_subsys_device(list, count, SUBSYS_AFTER_SHUTDOWN, NULL);

	notify_each_subsys_device(list, count, SUBSYS_RAMDUMP_NOTIFICATION,
@@ -973,12 +1003,19 @@ static void subsystem_restart_wq_func(struct work_struct *work)
	for_each_subsys_device(list, count, NULL, subsystem_free_memory);

	notify_each_subsys_device(list, count, SUBSYS_BEFORE_POWERUP, NULL);
	for_each_subsys_device(list, count, NULL, subsystem_powerup);
	ret = for_each_subsys_device(list, count, NULL, subsystem_powerup);
	if (ret)
		goto err;
	notify_each_subsys_device(list, count, SUBSYS_AFTER_POWERUP, NULL);

	pr_info("[%s:%d]: Restart sequence for %s completed.\n",
			current->comm, current->pid, desc->name);

err:
	/* Reset subsys count */
	if (ret)
		dev->count = 0;

	mutex_unlock(&soc_order_reg_lock);
	mutex_unlock(&track->lock);

@@ -1470,6 +1507,9 @@ static int subsys_parse_devicetree(struct subsys_desc *desc)
			desc->generic_irq = ret;
	}

	desc->ignore_ssr_failure = of_property_read_bool(pdev->dev.of_node,
						"qcom,ignore-ssr-failure");

	order = ssr_parse_restart_orders(desc);
	if (IS_ERR(order)) {
		pr_err("Could not initialize SSR restart order, err = %ld\n",
+3 −0
Original line number Diff line number Diff line
@@ -56,6 +56,8 @@ struct module;
 * @sysmon_shutdown_ret: Return value for the call to sysmon_send_shutdown
 * @system_debug: If "set", triggers a device restart when the
 * subsystem's wdog bite handler is invoked.
 * @ignore_ssr_failure: SSR failures are usually fatal and results in panic. If
 * set will ignore failure.
 * @edge: GLINK logical name of the subsystem
 */
struct subsys_desc {
@@ -91,6 +93,7 @@ struct subsys_desc {
	u32 sysmon_pid;
	int sysmon_shutdown_ret;
	bool system_debug;
	bool ignore_ssr_failure;
	const char *edge;
};