Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ab9b6838 authored by Shrinidhi Hegde's avatar Shrinidhi Hegde
Browse files

Throttle reboot from native watchdog

Found out that increasing the threshold to 20 crashes pushes the file
system based rollbacks as well. So introducing a throttling behaviour
instead. Now native watchdog performs reboot with ramdump at 5 restarts.
After that packageWatchdog/ RescueParty takes over to perform other
mitigations. Ram dump + reboot will not be performed more than once
in 24hrs.

Test: manual
Bug: 291137901
Change-Id: Ia192411dad94e8e25c26f700d2fe7f94d41439b8
parent ef2e67ee
Loading
Loading
Loading
Loading
+22 −7
Original line number Diff line number Diff line
@@ -355,20 +355,35 @@ void Service::Reap(const siginfo_t& siginfo) {
    // If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
    // reboot into bootloader or set crashing property
    boot_clock::time_point now = boot_clock::now();
    constexpr const char native_watchdog_reboot_time[] = "persist.init.svc.last_fatal_reboot_epoch";
    uint64_t throttle_window =
            std::chrono::duration_cast<std::chrono::seconds>(std::chrono::hours(24)).count();
    if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) &&
        !was_last_exit_ok_) {
        bool boot_completed = GetBoolProperty("sys.boot_completed", false);
        if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
            if (++crash_count_ > 4) {
                auto exit_reason = boot_completed ?
                    "in " + std::to_string(fatal_crash_window_.count()) + " minutes" :
                    "before boot completed";
                auto exit_reason =
                        boot_completed
                                ? "in " + std::to_string(fatal_crash_window_.count()) + " minutes"
                                : "before boot completed";
                if (flags_ & SVC_CRITICAL) {
                    if (!GetBoolProperty("init.svc_debug.no_fatal." + name_, false)) {
                        uint64_t epoch_time =
                                std::chrono::duration_cast<std::chrono::seconds>(
                                        std::chrono::system_clock::now().time_since_epoch())
                                        .count();
                        // Do not reboot again If it was already initiated in the last 24hrs
                        if (epoch_time - GetIntProperty(native_watchdog_reboot_time, 0) >
                            throttle_window) {
                            SetProperty(native_watchdog_reboot_time, std::to_string(epoch_time));
                            // Aborts into `fatal_reboot_target_'.
                            SetFatalRebootTarget(fatal_reboot_target_);
                            LOG(FATAL) << "critical process '" << name_ << "' exited 4 times "
                                       << exit_reason;
                        } else {
                            LOG(INFO) << "Reboot already performed in last 24hrs because of crash.";
                        }
                    }
                } else {
                    LOG(ERROR) << "process with updatable components '" << name_