Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d28da5cf authored by Zhuoyao Zhang's avatar Zhuoyao Zhang
Browse files

Support to cleanup all existing edit monitor instances

Add a cleanup method in daemon manager to stop all the existing edit
monitor instances and place a block sign to provent any edit monitor to
start. This method is only used in emergency case when something goes
wrong with edit monitor and need immediate cleanup to prevent damage to
the system.

Test: atest daemon_manager_test
bug: 365617369
Change-Id: I8bff4f82a8ce272ccafb4ff8b076f05f56609426
parent ae332aea
Loading
Loading
Loading
Loading
+53 −6
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@ DEFAULT_MONITOR_INTERVAL_SECONDS = 5
DEFAULT_MEMORY_USAGE_THRESHOLD = 2000
DEFAULT_CPU_USAGE_THRESHOLD = 200
DEFAULT_REBOOT_TIMEOUT_SECONDS = 60 * 60 * 24
BLOCK_SIGN_FILE = "edit_monitor_block_sign"


def default_daemon_target():
@@ -59,15 +60,19 @@ class DaemonManager:
    pid_file_dir = pathlib.Path(tempfile.gettempdir()).joinpath("edit_monitor")
    pid_file_dir.mkdir(parents=True, exist_ok=True)
    self.pid_file_path = self._get_pid_file_path(pid_file_dir)
    self.block_sign = pathlib.Path(tempfile.gettempdir()).joinpath(
        BLOCK_SIGN_FILE
    )

  def start(self):
    """Writes the pidfile and starts the daemon proces."""
    try:
    if self.block_sign.exists():
      logging.warning("Block sign found, exiting...")
      return

    self._stop_any_existing_instance()
    self._write_pid_to_pidfile()
    self._start_daemon_process()
    except Exception as e:
      logging.exception("Failed to start daemon manager with error %s", e)

  def monitor_daemon(
      self,
@@ -82,6 +87,9 @@ class DaemonManager:
    process is still running and kill the process if the resource usage is above
    given thresholds.
    """
    if not self.daemon_process:
      return

    logging.info("start monitoring daemon process %d.", self.daemon_process.pid)
    reboot_time = time.time() + reboot_timeout
    while self.daemon_process.is_alive():
@@ -150,6 +158,33 @@ class DaemonManager:
      logging.exception("Failed to reboot process with error: %s.", e)
      sys.exit(1)  # Indicate an error occurred

  def cleanup(self):
    """Wipes out all edit monitor instances in the system.

    Stops all the existing edit monitor instances and place a block sign
    to prevent any edit monitor process to start. This method is only used
    in emergency case when there's something goes wrong with the edit monitor
    that requires immediate cleanup to prevent damanger to the system.
    """
    logging.debug("Start cleaning up all existing instances.")

    try:
      # First places a block sign to prevent any edit monitor process to start.
      self.block_sign.touch()
    except (FileNotFoundError, PermissionError, OSError):
      logging.exception("Failed to place the block sign")

    # Finds and kills all the existing instances of edit monitor.
    existing_instances_pids = self._find_all_instances_pids()
    for pid in existing_instances_pids:
      logging.info(
          "Found existing edit monitor instance with pid %d, killing...", pid
      )
      try:
        self._terminate_process(pid)
      except Exception:
        logging.exception("Failed to terminate process %d", pid)

  def _stop_any_existing_instance(self):
    if not self.pid_file_path.exists():
      logging.debug("No existing instances.")
@@ -300,3 +335,15 @@ class DaemonManager:
      stime = int(stats[14])
      return (utime + stime) / os.sysconf(os.sysconf_names["SC_CLK_TCK"])

  def _find_all_instances_pids(self) -> list[int]:
    pids = []

    for file in os.listdir(self.pid_file_path.parent):
      if file.endswith(".lock"):
        try:
          with open(self.pid_file_path.parent.joinpath(file), "r") as f:
            pids.append(int(f.read().strip()))
        except (FileNotFoundError, IOError, ValueError, TypeError):
          logging.exception("Failed to get pid from file path: %s", file)

    return pids
 No newline at end of file
+43 −31
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@ import unittest
from unittest import mock
from edit_monitor import daemon_manager


TEST_BINARY_FILE = '/path/to/test_binary'
TEST_PID_FILE_PATH = (
    '587239c2d1050afdf54512e2d799f3b929f86b43575eb3c7b4bab105dd9bd25e.lock'
@@ -92,20 +93,10 @@ class DaemonManagerTest(unittest.TestCase):
    self.assert_run_simple_daemon_success()

  def test_start_success_with_existing_instance_running(self):
    # Create a long running subprocess
    p = multiprocessing.Process(target=long_running_daemon)
    p.start()

    # Create a pidfile with the subprocess pid
    pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
        'edit_monitor'
    )
    pid_file_path_dir.mkdir(parents=True, exist_ok=True)
    with open(pid_file_path_dir.joinpath(TEST_PID_FILE_PATH), 'w') as f:
      f.write(str(p.pid))
    # Create a running daemon subprocess
    p = self._create_fake_deamon_process()

    self.assert_run_simple_daemon_success()
    p.terminate()

  def test_start_success_with_existing_instance_already_dead(self):
    # Create a pidfile with pid that does not exist.
@@ -129,6 +120,17 @@ class DaemonManagerTest(unittest.TestCase):
    self.assert_run_simple_daemon_success()
    existing_dm.stop()

  def test_start_return_directly_if_block_sign_exists(self):
    # Creates the block sign.
    pathlib.Path(self.working_dir.name).joinpath(
        daemon_manager.BLOCK_SIGN_FILE
    ).touch()

    dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
    dm.start()
    # Verify no daemon process is started.
    self.assertIsNone(dm.daemon_process)

  @mock.patch('os.kill')
  def test_start_failed_to_kill_existing_instance(self, mock_kill):
    mock_kill.side_effect = OSError('Unknown OSError')
@@ -139,12 +141,10 @@ class DaemonManagerTest(unittest.TestCase):
    with open(pid_file_path_dir.joinpath(TEST_PID_FILE_PATH), 'w') as f:
      f.write('123456')

    with self.assertRaises(OSError) as error:
      dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
      dm.start()

    # Verify no daemon process is started.
    self.assertIsNone(dm.daemon_process)

  def test_start_failed_to_write_pidfile(self):
    pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
        'edit_monitor'
@@ -153,21 +153,17 @@ class DaemonManagerTest(unittest.TestCase):
    # Makes the directory read-only so write pidfile will fail.
    os.chmod(pid_file_path_dir, 0o555)

    with self.assertRaises(PermissionError) as error:
      dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
      dm.start()

    # Verifies no daemon process is started.
    self.assertIsNone(dm.daemon_process)

  def test_start_failed_to_start_daemon_process(self):
    with self.assertRaises(TypeError) as error:
      dm = daemon_manager.DaemonManager(
          TEST_BINARY_FILE, daemon_target='wrong_target', daemon_args=(1)
      )
      dm.start()

    # Verifies no daemon process is started.
    self.assertIsNone(dm.daemon_process)

  def test_monitor_daemon_subprocess_killed_high_memory_usage(self):
    dm = daemon_manager.DaemonManager(
        TEST_BINARY_FILE,
@@ -321,7 +317,7 @@ class DaemonManagerTest(unittest.TestCase):
          self._is_process_alive(child_pid), f'process {child_pid} still alive'
      )

  def _get_child_processes(self, parent_pid):
  def _get_child_processes(self, parent_pid: int) -> list[int]:
    try:
      output = subprocess.check_output(
          ['ps', '-o', 'pid,ppid', '--no-headers'], text=True
@@ -336,7 +332,7 @@ class DaemonManagerTest(unittest.TestCase):
    except subprocess.CalledProcessError as e:
      self.fail(f'failed to get child process, error: {e}')

  def _is_process_alive(self, pid):
  def _is_process_alive(self, pid: int) -> bool:
    try:
      output = subprocess.check_output(
          ['ps', '-p', str(pid), '-o', 'state='], text=True
@@ -355,6 +351,22 @@ class DaemonManagerTest(unittest.TestCase):
        # process already terminated
        pass

  def _create_fake_deamon_process(
      self, name: str = ''
  ) -> multiprocessing.Process:
    # Create a long running subprocess
    p = multiprocessing.Process(target=long_running_daemon)
    p.start()

    # Create the pidfile with the subprocess pid
    pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
        'edit_monitor'
    )
    pid_file_path_dir.mkdir(parents=True, exist_ok=True)
    with open(pid_file_path_dir.joinpath(name + 'pid.lock'), 'w') as f:
      f.write(str(p.pid))
    return p


if __name__ == '__main__':
  unittest.main()