Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a7107913 authored by Zhuoyao Zhang's avatar Zhuoyao Zhang
Browse files

Fix a race condition in edit monitor

When there are multiple edit monitor starting around the same time,
there will be cases that the pidfile is deleted or overridden as
multiple processes are trying to remove and create the pidfile around
the same time (see details in b/382135550).
This cl fix it by checking the pid in the pidfile and
only remove the pidfile if the pid stored there is the expected one.

Bug: 382135550
Test: atest edit_monitor_integration_test
Change-Id: Ifdb4c29c8b36052fea83ce59de1be9d99db3a852
parent 3b2cf40e
Loading
Loading
Loading
Loading
+30 −12
Original line number Diff line number Diff line
@@ -149,15 +149,15 @@ class DaemonManager:
            edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_MEMORY_USAGE
        )
        logging.error(
            "Daemon process is consuming too much memory, rebooting...")
            "Daemon process is consuming too much memory, rebooting..."
        )
        self.reboot()

      if self.max_cpu_usage >= cpu_threshold:
        self._send_error_event_to_clearcut(
            edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_CPU_USAGE
        )
        logging.error(
            "Daemon process is consuming too much cpu, killing...")
        logging.error("Daemon process is consuming too much cpu, killing...")
        self._terminate_process(self.daemon_process.pid)

    logging.info(
@@ -179,7 +179,7 @@ class DaemonManager:
        self._wait_for_process_terminate(self.daemon_process.pid, 1)
        if self.daemon_process.is_alive():
          self._terminate_process(self.daemon_process.pid)
      self._remove_pidfile()
      self._remove_pidfile(self.pid)
      logging.info("Successfully stopped daemon manager.")
    except Exception as e:
      logging.exception("Failed to stop daemon manager with error %s", e)
@@ -253,11 +253,15 @@ class DaemonManager:
    if ex_pid:
      logging.info("Found another instance with pid %d.", ex_pid)
      self._terminate_process(ex_pid)
      self._remove_pidfile()
      self._remove_pidfile(ex_pid)

  def _read_pid_from_pidfile(self):
  def _read_pid_from_pidfile(self) -> int | None:
    try:
      with open(self.pid_file_path, "r") as f:
        return int(f.read().strip())
    except FileNotFoundError as e:
      logging.warning("pidfile %s does not exist.", self.pid_file_path)
      return None

  def _write_pid_to_pidfile(self):
    """Creates a pidfile and writes the current pid to the file.
@@ -333,7 +337,23 @@ class DaemonManager:
      )
      return True

  def _remove_pidfile(self):
  def _remove_pidfile(self, expected_pid: int):
    recorded_pid = self._read_pid_from_pidfile()

    if recorded_pid is None:
      logging.info("pid file %s already removed.", self.pid_file_path)
      return

    if recorded_pid != expected_pid:
      logging.warning(
          "pid file contains pid from a different process, expected pid: %d,"
          " actual pid: %d.",
          expected_pid,
          recorded_pid,
      )
      return

    logging.debug("removing pidfile written by process %s", expected_pid)
    try:
      os.remove(self.pid_file_path)
    except FileNotFoundError:
@@ -378,9 +398,7 @@ class DaemonManager:
      uptime_end = float(f.readline().split()[0])

    return (
        (total_end_time - total_start_time)
        / (uptime_end - uptime_start)
        * 100
        (total_end_time - total_start_time) / (uptime_end - uptime_start) * 100
    )

  def _get_total_cpu_time(self, pid: int) -> float:
+33 −5
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
"""Integration tests for Edit Monitor."""

import glob
from importlib import resources
import logging
import os
import pathlib
@@ -25,8 +26,6 @@ import sys
import tempfile
import time
import unittest

from importlib import resources
from unittest import mock


@@ -49,7 +48,8 @@ class EditMonitorIntegrationTest(unittest.TestCase):
    self.root_monitoring_path.mkdir()
    self.edit_monitor_binary_path = self._import_executable("edit_monitor")
    self.patch = mock.patch.dict(
        os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'true'})
        os.environ, {"ENABLE_ANDROID_EDIT_MONITOR": "true"}
    )
    self.patch.start()

  def tearDown(self):
@@ -83,7 +83,21 @@ class EditMonitorIntegrationTest(unittest.TestCase):

    self.assertEqual(self._get_logged_events_num(), 4)

  def _start_edit_monitor_process(self):
  def test_start_multiple_edit_monitor_only_one_started(self):
    p1 = self._start_edit_monitor_process(wait_for_observer_start=False)
    p2 = self._start_edit_monitor_process(wait_for_observer_start=False)
    p3 = self._start_edit_monitor_process(wait_for_observer_start=False)

    live_processes = self._get_live_processes([p1, p2, p3])

    # Cleanup all live processes.
    for p in live_processes:
      os.kill(p.pid, signal.SIGINT)
      p.communicate()

    self.assertEqual(len(live_processes), 1)

  def _start_edit_monitor_process(self, wait_for_observer_start=True):
    command = f"""
    export TMPDIR="{self.working_dir.name}"
    {self.edit_monitor_binary_path} --path={self.root_monitoring_path} --dry_run"""
@@ -94,7 +108,9 @@ class EditMonitorIntegrationTest(unittest.TestCase):
        start_new_session=True,
        executable="/bin/bash",
    )
    if wait_for_observer_start:
      self._wait_for_observer_start(time_out=5)

    return p

  def _wait_for_observer_start(self, time_out):
@@ -125,6 +141,18 @@ class EditMonitorIntegrationTest(unittest.TestCase):

    return 0

  def _get_live_processes(self, processes):
    live_processes = []
    for p in processes:
      try:
        p.wait(timeout=5)
      except subprocess.TimeoutExpired as e:
        live_processes.append(p)
        logging.info("process: %d still alive.", p.pid)
      else:
        logging.info("process: %d stopped.", p.pid)
    return live_processes

  def _import_executable(self, executable_name: str) -> pathlib.Path:
    binary_dir = pathlib.Path(self.working_dir.name).joinpath("binary")
    binary_dir.mkdir()
+2 −1
Original line number Diff line number Diff line
@@ -72,7 +72,8 @@ def configure_logging(verbose=False):
  root_logging_dir = tempfile.mkdtemp(prefix='edit_monitor_')
  _, log_path = tempfile.mkstemp(dir=root_logging_dir, suffix='.log')

  log_fmt = '%(asctime)s %(filename)s:%(lineno)s:%(levelname)s: %(message)s'

  log_fmt = '%(asctime)s.%(msecs)03d %(filename)s:%(lineno)s:%(levelname)s: %(message)s'
  date_fmt = '%Y-%m-%d %H:%M:%S'
  log_level = logging.DEBUG if verbose else logging.INFO