Loading drivers/soc/qcom/Kconfig +37 −0 Original line number Diff line number Diff line Loading @@ -375,6 +375,43 @@ config MSM_PIL_SSR_GENERIC or a fatal error. Subsystems include LPASS, Venus, VPU, WCNSS and BCSS. config SETUP_SSR_NOTIF_TIMEOUTS bool "Set timeouts on SSR sysmon notifications and notifier callbacks" help Setup timers prior to initiating communication between subsystems through sysmon, and prior to sending notifications to drivers in the kernel that have registered callbacks with the subsystem notifier framework for a particular subsystem. This is a debugging feature. config SSR_SYSMON_NOTIF_TIMEOUT depends on SETUP_SSR_NOTIF_TIMEOUTS int "SSR Sysmon notifications timeout in ms" default 10000 help The amount of time, in milliseconds, that should elapse between the start and end of sysmon SSR notifications, before a warning is emitted. config SSR_SUBSYS_NOTIF_TIMEOUT depends on SETUP_SSR_NOTIF_TIMEOUTS int "SSR Subsystem notifier timeout in ms" default 10000 help The amount of time, in milliseconds, that should elapse between the start and end of SSR notifications through the subsystem notifier, before a warning is emitted. config PANIC_ON_SSR_NOTIF_TIMEOUT bool "Trigger kernel panic when notification timeout expires" depends on SETUP_SSR_NOTIF_TIMEOUTS help Trigger a kernel panic when communication between subsystems through sysmon is taking too long. Also trigger a kernel panic if invoking the callbacks registered with a particular subsystem's notifications by the subsystem notifier framework is taking too long. This is a debugging feature. config MSM_BOOT_STATS bool "Use MSM boot stats reporting" help Loading drivers/soc/qcom/subsystem_restart.c +117 −3 Original line number Diff line number Diff line Loading @@ -39,6 +39,7 @@ #include <linux/of_irq.h> #include <linux/of.h> #include <asm/current.h> #include <linux/timer.h> #include "peripheral-loader.h" Loading @@ -54,6 +55,35 @@ module_param(enable_debug, int, 0644); #define SHUTDOWN_ACK_MAX_LOOPS 100 #define SHUTDOWN_ACK_DELAY_MS 100 #ifdef CONFIG_SETUP_SSR_NOTIF_TIMEOUTS /* Timeout used for detection of notification hangs. In seconds.*/ #define SYSMON_COMM_TIMEOUT CONFIG_SSR_SYSMON_NOTIF_TIMEOUT #define SUBSYS_NOTIF_TIMEOUT CONFIG_SSR_SUBSYS_NOTIF_TIMEOUT #define setup_timeout(dest_ss, source_ss, comm_type) \ _setup_timeout(dest_ss, source_ss, comm_type) #define cancel_timeout(subsys) del_timer(&subsys->timeout_data.timer) #define init_subsys_timer(subsys) _init_subsys_timer(subsys) /* Timeout values */ static unsigned long timeout_vals[NUM_SSR_COMMS] = { [SUBSYS_TO_SUBSYS_SYSMON] = SYSMON_COMM_TIMEOUT, [SUBSYS_TO_HLOS] = SUBSYS_NOTIF_TIMEOUT, [HLOS_TO_SUBSYS_SYSMON_SHUTDOWN] = SYSMON_COMM_TIMEOUT, }; #ifdef CONFIG_PANIC_ON_SSR_NOTIF_TIMEOUT #define SSR_NOTIF_TIMEOUT_WARN(fmt...) panic(fmt) #else /* CONFIG_PANIC_ON_SSR_NOTIF_TIMEOUT */ #define SSR_NOTIF_TIMEOUT_WARN(fmt...) WARN(1, fmt) #endif /* CONFIG_PANIC_ON_SSR_NOTIF_TIMEOUT */ #else /* CONFIG_SETUP_SSR_NOTIF_TIMEOUTS */ #define setup_timeout(dest_ss, source_ss, sysmon_comm) #define cancel_timeout(subsys) #define init_subsys_timer(subsys) #endif /* CONFIG_SETUP_SSR_NOTIF_TIMEOUTS */ /** * enum p_subsys_state - state of a subsystem (private) * @SUBSYS_NORMAL: subsystem is operating normally Loading Loading @@ -485,15 +515,89 @@ static int is_ramdump_enabled(struct subsys_device *dev) return enable_ramdumps; } #ifdef CONFIG_SETUP_SSR_NOTIF_TIMEOUTS static void notif_timeout_handler(unsigned long data) { char *sysmon_msg = "Sysmon communication from %s to %s taking too long"; char *subsys_notif_msg = "Subsys notifier chain for %s taking too long"; char *sysmon_shutdwn_msg = "sysmon_send_shutdown to %s taking too long"; char *unknown_err_msg = "Unknown communication occurred"; struct subsys_notif_timeout *timeout_data = (struct subsys_notif_timeout *) data; enum ssr_comm comm_type = timeout_data->comm_type; switch (comm_type) { case SUBSYS_TO_SUBSYS_SYSMON: SSR_NOTIF_TIMEOUT_WARN(sysmon_msg, timeout_data->source_name, timeout_data->dest_name); break; case SUBSYS_TO_HLOS: SSR_NOTIF_TIMEOUT_WARN(subsys_notif_msg, timeout_data->source_name); break; case HLOS_TO_SUBSYS_SYSMON_SHUTDOWN: SSR_NOTIF_TIMEOUT_WARN(sysmon_shutdwn_msg, timeout_data->dest_name); break; default: SSR_NOTIF_TIMEOUT_WARN(unknown_err_msg); } } static void _setup_timeout(struct subsys_desc *source_ss, struct subsys_desc *dest_ss, enum ssr_comm comm_type) { struct subsys_notif_timeout *timeout_data; unsigned long timeout; switch (comm_type) { case SUBSYS_TO_SUBSYS_SYSMON: timeout_data = &source_ss->timeout_data; timeout_data->dest_name = dest_ss->name; timeout_data->source_name = source_ss->name; break; case SUBSYS_TO_HLOS: timeout_data = &source_ss->timeout_data; timeout_data->dest_name = NULL; timeout_data->source_name = source_ss->name; break; case HLOS_TO_SUBSYS_SYSMON_SHUTDOWN: timeout_data = &dest_ss->timeout_data; timeout_data->dest_name = dest_ss->name; timeout_data->source_name = NULL; break; default: return; } timeout_data->timer.data = (unsigned long) timeout_data; timeout_data->comm_type = comm_type; timeout = jiffies + msecs_to_jiffies(timeout_vals[comm_type]); mod_timer(&timeout_data->timer, timeout); } static void _init_subsys_timer(struct subsys_desc *subsys) { init_timer(&subsys->timeout_data.timer); subsys->timeout_data.timer.function = notif_timeout_handler; } #endif /* CONFIG_SETUP_SSR_NOTIF_TIMEOUTS */ static void send_sysmon_notif(struct subsys_device *dev) { struct subsys_device *subsys; mutex_lock(&subsys_list_lock); list_for_each_entry(subsys, &subsys_list, list) if ((subsys->notif_state > 0) && (subsys != dev)) if ((subsys->notif_state > 0) && (subsys != dev)) { setup_timeout(subsys->desc, dev->desc, SUBSYS_TO_SUBSYS_SYSMON); sysmon_send_event(dev->desc, subsys->desc, subsys->notif_state); cancel_timeout(subsys->desc); } mutex_unlock(&subsys_list_lock); } Loading Loading @@ -536,9 +640,13 @@ static void notify_each_subsys_device(struct subsys_device **list, mutex_lock(&subsys_list_lock); list_for_each_entry(subsys, &subsys_list, list) if (dev != subsys && subsys->track.state == SUBSYS_ONLINE) subsys->track.state == SUBSYS_ONLINE) { setup_timeout(dev->desc, subsys->desc, SUBSYS_TO_SUBSYS_SYSMON); sysmon_send_event(subsys->desc, dev->desc, notif); cancel_timeout(dev->desc); } mutex_unlock(&subsys_list_lock); if (notif == SUBSYS_AFTER_POWERUP && Loading @@ -552,8 +660,10 @@ static void notify_each_subsys_device(struct subsys_device **list, notif_data.pdev = pdev; trace_pil_notif("before_send_notif", notif, dev->desc->fw_name); setup_timeout(dev->desc, NULL, SUBSYS_TO_HLOS); subsys_notif_queue_notification(dev->notify, notif, ¬if_data); cancel_timeout(dev->desc); trace_pil_notif("after_send_notif", notif, dev->desc->fw_name); } } Loading Loading @@ -775,8 +885,11 @@ static void subsys_stop(struct subsys_device *subsys) if (!of_property_read_bool(subsys->desc->dev->of_node, "qcom,pil-force-shutdown")) { subsys_set_state(subsys, SUBSYS_OFFLINING); setup_timeout(NULL, subsys->desc, HLOS_TO_SUBSYS_SYSMON_SHUTDOWN); subsys->desc->sysmon_shutdown_ret = sysmon_send_shutdown(subsys->desc); cancel_timeout(subsys->desc); if (subsys->desc->sysmon_shutdown_ret) pr_debug("Graceful shutdown failed for %s\n", name); } Loading Loading @@ -1694,6 +1807,7 @@ struct subsys_device *subsys_register(struct subsys_desc *desc) INIT_WORK(&subsys->work, subsystem_restart_wq_func); INIT_WORK(&subsys->device_restart_work, device_restart_work_hdlr); spin_lock_init(&subsys->track.s_lock); init_subsys_timer(desc); subsys->id = ida_simple_get(&subsys_ida, 0, 0, GFP_KERNEL); if (subsys->id < 0) { Loading include/soc/qcom/subsystem_restart.h +27 −0 Original line number Diff line number Diff line Loading @@ -35,6 +35,30 @@ enum crash_status { struct device; struct module; enum ssr_comm { SUBSYS_TO_SUBSYS_SYSMON, SUBSYS_TO_HLOS, HLOS_TO_SUBSYS_SYSMON_SHUTDOWN, NUM_SSR_COMMS, }; /** * struct subsys_notif_timeout - timeout data used by notification timeout hdlr * @comm_type: Specifies if the type of communication being tracked is * through sysmon between two subsystems, subsystem notifier call chain, or * sysmon shutdown. * @dest_name: subsystem to which sysmon notification is being sent to * @source_name: subsystem which generated event that notification is being sent * for * @timer: timer for scheduling timeout */ struct subsys_notif_timeout { enum ssr_comm comm_type; const char *dest_name; const char *source_name; struct timer_list timer; }; /** * struct subsys_desc - subsystem descriptor * @name: name of subsystem Loading Loading @@ -98,6 +122,9 @@ struct subsys_desc { bool ignore_ssr_failure; const char *edge; struct qcom_smem_state *state; #ifdef CONFIG_SETUP_SSR_NOTIF_TIMEOUTS struct subsys_notif_timeout timeout_data; #endif /* CONFIG_SETUP_SSR_NOTIF_TIMEOUTS */ }; /** Loading Loading
drivers/soc/qcom/Kconfig +37 −0 Original line number Diff line number Diff line Loading @@ -375,6 +375,43 @@ config MSM_PIL_SSR_GENERIC or a fatal error. Subsystems include LPASS, Venus, VPU, WCNSS and BCSS. config SETUP_SSR_NOTIF_TIMEOUTS bool "Set timeouts on SSR sysmon notifications and notifier callbacks" help Setup timers prior to initiating communication between subsystems through sysmon, and prior to sending notifications to drivers in the kernel that have registered callbacks with the subsystem notifier framework for a particular subsystem. This is a debugging feature. config SSR_SYSMON_NOTIF_TIMEOUT depends on SETUP_SSR_NOTIF_TIMEOUTS int "SSR Sysmon notifications timeout in ms" default 10000 help The amount of time, in milliseconds, that should elapse between the start and end of sysmon SSR notifications, before a warning is emitted. config SSR_SUBSYS_NOTIF_TIMEOUT depends on SETUP_SSR_NOTIF_TIMEOUTS int "SSR Subsystem notifier timeout in ms" default 10000 help The amount of time, in milliseconds, that should elapse between the start and end of SSR notifications through the subsystem notifier, before a warning is emitted. config PANIC_ON_SSR_NOTIF_TIMEOUT bool "Trigger kernel panic when notification timeout expires" depends on SETUP_SSR_NOTIF_TIMEOUTS help Trigger a kernel panic when communication between subsystems through sysmon is taking too long. Also trigger a kernel panic if invoking the callbacks registered with a particular subsystem's notifications by the subsystem notifier framework is taking too long. This is a debugging feature. config MSM_BOOT_STATS bool "Use MSM boot stats reporting" help Loading
drivers/soc/qcom/subsystem_restart.c +117 −3 Original line number Diff line number Diff line Loading @@ -39,6 +39,7 @@ #include <linux/of_irq.h> #include <linux/of.h> #include <asm/current.h> #include <linux/timer.h> #include "peripheral-loader.h" Loading @@ -54,6 +55,35 @@ module_param(enable_debug, int, 0644); #define SHUTDOWN_ACK_MAX_LOOPS 100 #define SHUTDOWN_ACK_DELAY_MS 100 #ifdef CONFIG_SETUP_SSR_NOTIF_TIMEOUTS /* Timeout used for detection of notification hangs. In seconds.*/ #define SYSMON_COMM_TIMEOUT CONFIG_SSR_SYSMON_NOTIF_TIMEOUT #define SUBSYS_NOTIF_TIMEOUT CONFIG_SSR_SUBSYS_NOTIF_TIMEOUT #define setup_timeout(dest_ss, source_ss, comm_type) \ _setup_timeout(dest_ss, source_ss, comm_type) #define cancel_timeout(subsys) del_timer(&subsys->timeout_data.timer) #define init_subsys_timer(subsys) _init_subsys_timer(subsys) /* Timeout values */ static unsigned long timeout_vals[NUM_SSR_COMMS] = { [SUBSYS_TO_SUBSYS_SYSMON] = SYSMON_COMM_TIMEOUT, [SUBSYS_TO_HLOS] = SUBSYS_NOTIF_TIMEOUT, [HLOS_TO_SUBSYS_SYSMON_SHUTDOWN] = SYSMON_COMM_TIMEOUT, }; #ifdef CONFIG_PANIC_ON_SSR_NOTIF_TIMEOUT #define SSR_NOTIF_TIMEOUT_WARN(fmt...) panic(fmt) #else /* CONFIG_PANIC_ON_SSR_NOTIF_TIMEOUT */ #define SSR_NOTIF_TIMEOUT_WARN(fmt...) WARN(1, fmt) #endif /* CONFIG_PANIC_ON_SSR_NOTIF_TIMEOUT */ #else /* CONFIG_SETUP_SSR_NOTIF_TIMEOUTS */ #define setup_timeout(dest_ss, source_ss, sysmon_comm) #define cancel_timeout(subsys) #define init_subsys_timer(subsys) #endif /* CONFIG_SETUP_SSR_NOTIF_TIMEOUTS */ /** * enum p_subsys_state - state of a subsystem (private) * @SUBSYS_NORMAL: subsystem is operating normally Loading Loading @@ -485,15 +515,89 @@ static int is_ramdump_enabled(struct subsys_device *dev) return enable_ramdumps; } #ifdef CONFIG_SETUP_SSR_NOTIF_TIMEOUTS static void notif_timeout_handler(unsigned long data) { char *sysmon_msg = "Sysmon communication from %s to %s taking too long"; char *subsys_notif_msg = "Subsys notifier chain for %s taking too long"; char *sysmon_shutdwn_msg = "sysmon_send_shutdown to %s taking too long"; char *unknown_err_msg = "Unknown communication occurred"; struct subsys_notif_timeout *timeout_data = (struct subsys_notif_timeout *) data; enum ssr_comm comm_type = timeout_data->comm_type; switch (comm_type) { case SUBSYS_TO_SUBSYS_SYSMON: SSR_NOTIF_TIMEOUT_WARN(sysmon_msg, timeout_data->source_name, timeout_data->dest_name); break; case SUBSYS_TO_HLOS: SSR_NOTIF_TIMEOUT_WARN(subsys_notif_msg, timeout_data->source_name); break; case HLOS_TO_SUBSYS_SYSMON_SHUTDOWN: SSR_NOTIF_TIMEOUT_WARN(sysmon_shutdwn_msg, timeout_data->dest_name); break; default: SSR_NOTIF_TIMEOUT_WARN(unknown_err_msg); } } static void _setup_timeout(struct subsys_desc *source_ss, struct subsys_desc *dest_ss, enum ssr_comm comm_type) { struct subsys_notif_timeout *timeout_data; unsigned long timeout; switch (comm_type) { case SUBSYS_TO_SUBSYS_SYSMON: timeout_data = &source_ss->timeout_data; timeout_data->dest_name = dest_ss->name; timeout_data->source_name = source_ss->name; break; case SUBSYS_TO_HLOS: timeout_data = &source_ss->timeout_data; timeout_data->dest_name = NULL; timeout_data->source_name = source_ss->name; break; case HLOS_TO_SUBSYS_SYSMON_SHUTDOWN: timeout_data = &dest_ss->timeout_data; timeout_data->dest_name = dest_ss->name; timeout_data->source_name = NULL; break; default: return; } timeout_data->timer.data = (unsigned long) timeout_data; timeout_data->comm_type = comm_type; timeout = jiffies + msecs_to_jiffies(timeout_vals[comm_type]); mod_timer(&timeout_data->timer, timeout); } static void _init_subsys_timer(struct subsys_desc *subsys) { init_timer(&subsys->timeout_data.timer); subsys->timeout_data.timer.function = notif_timeout_handler; } #endif /* CONFIG_SETUP_SSR_NOTIF_TIMEOUTS */ static void send_sysmon_notif(struct subsys_device *dev) { struct subsys_device *subsys; mutex_lock(&subsys_list_lock); list_for_each_entry(subsys, &subsys_list, list) if ((subsys->notif_state > 0) && (subsys != dev)) if ((subsys->notif_state > 0) && (subsys != dev)) { setup_timeout(subsys->desc, dev->desc, SUBSYS_TO_SUBSYS_SYSMON); sysmon_send_event(dev->desc, subsys->desc, subsys->notif_state); cancel_timeout(subsys->desc); } mutex_unlock(&subsys_list_lock); } Loading Loading @@ -536,9 +640,13 @@ static void notify_each_subsys_device(struct subsys_device **list, mutex_lock(&subsys_list_lock); list_for_each_entry(subsys, &subsys_list, list) if (dev != subsys && subsys->track.state == SUBSYS_ONLINE) subsys->track.state == SUBSYS_ONLINE) { setup_timeout(dev->desc, subsys->desc, SUBSYS_TO_SUBSYS_SYSMON); sysmon_send_event(subsys->desc, dev->desc, notif); cancel_timeout(dev->desc); } mutex_unlock(&subsys_list_lock); if (notif == SUBSYS_AFTER_POWERUP && Loading @@ -552,8 +660,10 @@ static void notify_each_subsys_device(struct subsys_device **list, notif_data.pdev = pdev; trace_pil_notif("before_send_notif", notif, dev->desc->fw_name); setup_timeout(dev->desc, NULL, SUBSYS_TO_HLOS); subsys_notif_queue_notification(dev->notify, notif, ¬if_data); cancel_timeout(dev->desc); trace_pil_notif("after_send_notif", notif, dev->desc->fw_name); } } Loading Loading @@ -775,8 +885,11 @@ static void subsys_stop(struct subsys_device *subsys) if (!of_property_read_bool(subsys->desc->dev->of_node, "qcom,pil-force-shutdown")) { subsys_set_state(subsys, SUBSYS_OFFLINING); setup_timeout(NULL, subsys->desc, HLOS_TO_SUBSYS_SYSMON_SHUTDOWN); subsys->desc->sysmon_shutdown_ret = sysmon_send_shutdown(subsys->desc); cancel_timeout(subsys->desc); if (subsys->desc->sysmon_shutdown_ret) pr_debug("Graceful shutdown failed for %s\n", name); } Loading Loading @@ -1694,6 +1807,7 @@ struct subsys_device *subsys_register(struct subsys_desc *desc) INIT_WORK(&subsys->work, subsystem_restart_wq_func); INIT_WORK(&subsys->device_restart_work, device_restart_work_hdlr); spin_lock_init(&subsys->track.s_lock); init_subsys_timer(desc); subsys->id = ida_simple_get(&subsys_ida, 0, 0, GFP_KERNEL); if (subsys->id < 0) { Loading
include/soc/qcom/subsystem_restart.h +27 −0 Original line number Diff line number Diff line Loading @@ -35,6 +35,30 @@ enum crash_status { struct device; struct module; enum ssr_comm { SUBSYS_TO_SUBSYS_SYSMON, SUBSYS_TO_HLOS, HLOS_TO_SUBSYS_SYSMON_SHUTDOWN, NUM_SSR_COMMS, }; /** * struct subsys_notif_timeout - timeout data used by notification timeout hdlr * @comm_type: Specifies if the type of communication being tracked is * through sysmon between two subsystems, subsystem notifier call chain, or * sysmon shutdown. * @dest_name: subsystem to which sysmon notification is being sent to * @source_name: subsystem which generated event that notification is being sent * for * @timer: timer for scheduling timeout */ struct subsys_notif_timeout { enum ssr_comm comm_type; const char *dest_name; const char *source_name; struct timer_list timer; }; /** * struct subsys_desc - subsystem descriptor * @name: name of subsystem Loading Loading @@ -98,6 +122,9 @@ struct subsys_desc { bool ignore_ssr_failure; const char *edge; struct qcom_smem_state *state; #ifdef CONFIG_SETUP_SSR_NOTIF_TIMEOUTS struct subsys_notif_timeout timeout_data; #endif /* CONFIG_SETUP_SSR_NOTIF_TIMEOUTS */ }; /** Loading