Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e0ef0005 authored by Zhuoyao Zhang's avatar Zhuoyao Zhang Committed by Gerrit Code Review
Browse files

Merge "Report edit monitor error to clearcut" into main

parents f64e2f84 ba64f31b
Loading
Loading
Loading
Loading
+53 −5
Original line number Diff line number Diff line
@@ -13,17 +13,22 @@
# limitations under the License.


import getpass
import hashlib
import logging
import multiprocessing
import os
import pathlib
import platform
import signal
import subprocess
import sys
import tempfile
import time

from atest.metrics import clearcut_client
from atest.proto import clientanalytics_pb2
from proto import edit_event_pb2

DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS = 1
DEFAULT_MONITOR_INTERVAL_SECONDS = 5
@@ -31,6 +36,9 @@ DEFAULT_MEMORY_USAGE_THRESHOLD = 2000
DEFAULT_CPU_USAGE_THRESHOLD = 200
DEFAULT_REBOOT_TIMEOUT_SECONDS = 60 * 60 * 24
BLOCK_SIGN_FILE = "edit_monitor_block_sign"
# Enum of the Clearcut log source defined under
# /google3/wireless/android/play/playlog/proto/log_source_enum.proto
LOG_SOURCE = 2524


def default_daemon_target():
@@ -46,11 +54,16 @@ class DaemonManager:
      binary_path: str,
      daemon_target: callable = default_daemon_target,
      daemon_args: tuple = (),
      cclient: clearcut_client.Clearcut | None = None,
  ):
    self.binary_path = binary_path
    self.daemon_target = daemon_target
    self.daemon_args = daemon_args
    self.cclient = cclient or clearcut_client.Clearcut(LOG_SOURCE)

    self.user_name = getpass.getuser()
    self.host_name = platform.node()
    self.source_root = os.environ.get("ANDROID_BUILD_TOP", "")
    self.pid = os.getpid()
    self.daemon_process = None

@@ -70,13 +83,20 @@ class DaemonManager:
      logging.warning("Block sign found, exiting...")
      return

    if self.binary_path.startswith('/google/cog/'):
    if self.binary_path.startswith("/google/cog/"):
      logging.warning("Edit monitor for cog is not supported, exiting...")
      return

    try:
      self._stop_any_existing_instance()
      self._write_pid_to_pidfile()
      self._start_daemon_process()
    except Exception as e:
      logging.exception("Failed to start daemon manager with error %s", e)
      self._send_error_event_to_clearcut(
          edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
      )
      raise e

  def monitor_daemon(
      self,
@@ -118,6 +138,9 @@ class DaemonManager:
        logging.error(
            "Daemon process is consuming too much resource, killing..."
        ),
        self._send_error_event_to_clearcut(
            edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_RESOURCE_USAGE
        )
        self._terminate_process(self.daemon_process.pid)

    logging.info(
@@ -143,6 +166,12 @@ class DaemonManager:
      logging.debug("Successfully stopped daemon manager.")
    except Exception as e:
      logging.exception("Failed to stop daemon manager with error %s", e)
      self._send_error_event_to_clearcut(
          edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR
      )
      sys.exit(1)
    finally:
      self.cclient.flush_events()

  def reboot(self):
    """Reboots the current process.
@@ -164,6 +193,9 @@ class DaemonManager:
      os.execv(self.binary_path, sys.argv)
    except OSError as e:
      logging.exception("Failed to reboot process with error: %s.", e)
      self._send_error_event_to_clearcut(
          edit_event_pb2.EditEvent.FAILED_TO_REBOOT_EDIT_MONITOR
      )
      sys.exit(1)  # Indicate an error occurred

  def cleanup(self):
@@ -175,6 +207,7 @@ class DaemonManager:
    that requires immediate cleanup to prevent damanger to the system.
    """
    logging.debug("Start cleaning up all existing instances.")
    self._send_error_event_to_clearcut(edit_event_pb2.EditEvent.FORCE_CLEANUP)

    try:
      # First places a block sign to prevent any edit monitor process to start.
@@ -356,3 +389,18 @@ class DaemonManager:
          logging.exception("Failed to get pid from file path: %s", file)

    return pids

  def _send_error_event_to_clearcut(self, error_type):
    edit_monitor_error_event_proto = edit_event_pb2.EditEvent(
        user_name=self.user_name,
        host_name=self.host_name,
        source_root=self.source_root,
    )
    edit_monitor_error_event_proto.edit_monitor_error_event.CopyFrom(
        edit_event_pb2.EditEvent.EditMonitorErrorEvent(error_type=error_type)
    )
    log_event = clientanalytics_pb2.LogEvent(
        event_time_ms=int(time.time() * 1000),
        source_extension=edit_monitor_error_event_proto.SerializeToString(),
    )
    self.cclient.log(log_event)
+99 −21
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ import time
import unittest
from unittest import mock
from edit_monitor import daemon_manager
from proto import edit_event_pb2


TEST_BINARY_FILE = '/path/to/test_binary'
@@ -133,7 +134,8 @@ class DaemonManagerTest(unittest.TestCase):

  def test_start_return_directly_if_in_cog_env(self):
    dm = daemon_manager.DaemonManager(
        '/google/cog/cloud/user/workspace/edit_monitor')
        '/google/cog/cloud/user/workspace/edit_monitor'
    )
    dm.start()
    # Verify no daemon process is started.
    self.assertIsNone(dm.daemon_process)
@@ -148,9 +150,13 @@ class DaemonManagerTest(unittest.TestCase):
    with open(pid_file_path_dir.joinpath(TEST_PID_FILE_PATH), 'w') as f:
      f.write('123456')

    with self.assertRaises(OSError) as error:
      dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
    fake_cclient = FakeClearcutClient()
    with self.assertRaises(OSError):
      dm = daemon_manager.DaemonManager(TEST_BINARY_FILE, cclient=fake_cclient)
      dm.start()
    self._assert_error_event_logged(
        fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
    )

  def test_start_failed_to_write_pidfile(self):
    pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
@@ -160,40 +166,63 @@ class DaemonManagerTest(unittest.TestCase):
    # Makes the directory read-only so write pidfile will fail.
    os.chmod(pid_file_path_dir, 0o555)

    with self.assertRaises(PermissionError) as error:
      dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
    fake_cclient = FakeClearcutClient()
    with self.assertRaises(PermissionError):
      dm = daemon_manager.DaemonManager(TEST_BINARY_FILE, cclient=fake_cclient)
      dm.start()
    self._assert_error_event_logged(
        fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
    )

  def test_start_failed_to_start_daemon_process(self):
    with self.assertRaises(TypeError) as error:
    fake_cclient = FakeClearcutClient()
    with self.assertRaises(TypeError):
      dm = daemon_manager.DaemonManager(
          TEST_BINARY_FILE, daemon_target='wrong_target', daemon_args=(1)
          TEST_BINARY_FILE,
          daemon_target='wrong_target',
          daemon_args=(1),
          cclient=fake_cclient,
      )
      dm.start()
    self._assert_error_event_logged(
        fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
    )

  def test_monitor_daemon_subprocess_killed_high_memory_usage(self):
    fake_cclient = FakeClearcutClient()
    dm = daemon_manager.DaemonManager(
        TEST_BINARY_FILE,
        daemon_target=memory_consume_daemon_target,
        daemon_args=(2,),
        cclient=fake_cclient,
    )
    dm.start()
    dm.monitor_daemon(interval=1, memory_threshold=2)

    self.assertTrue(dm.max_memory_usage >= 2)
    self.assert_no_subprocess_running()
    self._assert_error_event_logged(
        fake_cclient,
        edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_RESOURCE_USAGE,
    )

  def test_monitor_daemon_subprocess_killed_high_cpu_usage(self):
    fake_cclient = FakeClearcutClient()
    dm = daemon_manager.DaemonManager(
        TEST_BINARY_FILE,
        daemon_target=cpu_consume_daemon_target,
        daemon_args=(20,),
        cclient=fake_cclient,
    )
    dm.start()
    dm.monitor_daemon(interval=1, cpu_threshold=20)

    self.assertTrue(dm.max_cpu_usage >= 20)
    self.assert_no_subprocess_running()
    self._assert_error_event_logged(
        fake_cclient,
        edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_RESOURCE_USAGE,
    )

  @mock.patch('subprocess.check_output')
  def test_monitor_daemon_failed_does_not_matter(self, mock_output):
@@ -207,7 +236,8 @@ class DaemonManagerTest(unittest.TestCase):
    )

    dm = daemon_manager.DaemonManager(
        binary_file.name, daemon_target=long_running_daemon
        binary_file.name,
        daemon_target=long_running_daemon,
    )
    dm.start()
    dm.monitor_daemon(reboot_timeout=0.5)
@@ -226,28 +256,43 @@ class DaemonManagerTest(unittest.TestCase):
  @mock.patch('os.kill')
  def test_stop_failed_to_kill_daemon_process(self, mock_kill):
    mock_kill.side_effect = OSError('Unknown OSError')
    fake_cclient = FakeClearcutClient()
    dm = daemon_manager.DaemonManager(
        TEST_BINARY_FILE, daemon_target=long_running_daemon
        TEST_BINARY_FILE,
        daemon_target=long_running_daemon,
        cclient=fake_cclient,
    )

    with self.assertRaises(SystemExit):
      dm.start()
      dm.stop()

      self.assertTrue(dm.daemon_process.is_alive())
      self.assertTrue(dm.pid_file_path.exists())
    self._assert_error_event_logged(
        fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR
    )

  @mock.patch('os.remove')
  def test_stop_failed_to_remove_pidfile(self, mock_remove):
    mock_remove.side_effect = OSError('Unknown OSError')

    fake_cclient = FakeClearcutClient()
    dm = daemon_manager.DaemonManager(
        TEST_BINARY_FILE, daemon_target=long_running_daemon
        TEST_BINARY_FILE,
        daemon_target=long_running_daemon,
        cclient=fake_cclient,
    )

    with self.assertRaises(SystemExit):
      dm.start()
      dm.stop()

      self.assert_no_subprocess_running()
      self.assertTrue(dm.pid_file_path.exists())

    self._assert_error_event_logged(
        fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR
    )

  @mock.patch('os.execv')
  def test_reboot_success(self, mock_execv):
    binary_file = tempfile.NamedTemporaryFile(
@@ -273,7 +318,7 @@ class DaemonManagerTest(unittest.TestCase):
    )
    dm.start()

    with self.assertRaises(SystemExit) as cm:
    with self.assertRaises(SystemExit):
      dm.reboot()
      mock_execv.assert_not_called()
      self.assertEqual(cm.exception.code, 0)
@@ -281,18 +326,24 @@ class DaemonManagerTest(unittest.TestCase):
  @mock.patch('os.execv')
  def test_reboot_failed(self, mock_execv):
    mock_execv.side_effect = OSError('Unknown OSError')
    fake_cclient = FakeClearcutClient()
    binary_file = tempfile.NamedTemporaryFile(
        dir=self.working_dir.name, delete=False
    )

    dm = daemon_manager.DaemonManager(
        binary_file.name, daemon_target=long_running_daemon
        binary_file.name,
        daemon_target=long_running_daemon,
        cclient=fake_cclient,
    )
    dm.start()

    with self.assertRaises(SystemExit) as cm:
    with self.assertRaises(SystemExit):
      dm.reboot()
      self.assertEqual(cm.exception.code, 1)
    self._assert_error_event_logged(
        fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_REBOOT_EDIT_MONITOR
    )

  def assert_run_simple_daemon_success(self):
    damone_output_file = tempfile.NamedTemporaryFile(
@@ -374,6 +425,33 @@ class DaemonManagerTest(unittest.TestCase):
      f.write(str(p.pid))
    return p

  def _assert_error_event_logged(self, fake_cclient, error_type):
    error_events = fake_cclient.get_sent_events()
    self.assertEquals(len(error_events), 1)
    self.assertEquals(
        edit_event_pb2.EditEvent.FromString(
            error_events[0].source_extension
        ).edit_monitor_error_event.error_type,
        error_type,
    )


class FakeClearcutClient:

  def __init__(self):
    self.pending_log_events = []
    self.sent_log_event = []

  def log(self, log_event):
    self.pending_log_events.append(log_event)

  def flush_events(self):
    self.sent_log_event.extend(self.pending_log_events)
    self.pending_log_events.clear()

  def get_sent_events(self):
    return self.sent_log_event + self.pending_log_events


if __name__ == '__main__':
  unittest.main()