Loading tools/edit_monitor/daemon_manager.py +92 −0 Original line number Diff line number Diff line Loading @@ -25,6 +25,9 @@ import time DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS = 1 DEFAULT_MONITOR_INTERVAL_SECONDS = 5 DEFAULT_MEMORY_USAGE_THRESHOLD = 2000 DEFAULT_CPU_USAGE_THRESHOLD = 10 def default_daemon_target(): Loading @@ -48,6 +51,9 @@ class DaemonManager: self.pid = os.getpid() self.daemon_process = None self.max_memory_usage = 0 self.max_cpu_usage = 0 pid_file_dir = pathlib.Path(tempfile.gettempdir()).joinpath("edit_monitor") pid_file_dir.mkdir(parents=True, exist_ok=True) self.pid_file_path = self._get_pid_file_path(pid_file_dir) Loading @@ -61,6 +67,50 @@ class DaemonManager: except Exception as e: logging.exception("Failed to start daemon manager with error %s", e) def monitor_daemon( self, interval: int = DEFAULT_MONITOR_INTERVAL_SECONDS, memory_threshold: float = DEFAULT_MEMORY_USAGE_THRESHOLD, cpu_threshold: float = DEFAULT_CPU_USAGE_THRESHOLD, ): """Monits the daemon process status. Periodically check the CPU/Memory usage of the daemon process as long as the process is still running and kill the process if the resource usage is above given thresholds. """ logging.info("start monitoring daemon process %d.", self.daemon_process.pid) while self.daemon_process.is_alive(): try: memory_usage = self._get_process_memory_percent(self.daemon_process.pid) self.max_memory_usage = max(self.max_memory_usage, memory_usage) cpu_usage = self._get_process_cpu_percent(self.daemon_process.pid) self.max_cpu_usage = max(self.max_cpu_usage, cpu_usage) time.sleep(interval) except Exception as e: # Logging the error and continue. logging.warning("Failed to monitor daemon process with error: %s", e) if ( self.max_memory_usage >= memory_threshold or self.max_cpu_usage >= cpu_threshold ): logging.error( "Daemon process is consuming too much resource, killing..." ), self._terminate_process(self.daemon_process.pid) logging.info( "Daemon process %d terminated. Max memory usage: %f, Max cpu" " usage: %f.", self.daemon_process.pid, self.max_memory_usage, self.max_cpu_usage, ) def stop(self): """Stops the daemon process and removes the pidfile.""" Loading Loading @@ -180,3 +230,45 @@ class DaemonManager: logging.info("pid_file_path: %s", pid_file_path) return pid_file_path def _get_process_memory_percent(self, pid: int) -> float: try: with open(f"/proc/{pid}/stat", "r") as f: stat_data = f.readline().split() # RSS is the 24th field in /proc/[pid]/stat rss_pages = int(stat_data[23]) return rss_pages * 4 / 1024 # Covert to MB except (FileNotFoundError, IndexError, ValueError, IOError) as e: logging.exception("Failed to get memory usage.") raise e def _get_process_cpu_percent(self, pid: int, interval: int = 1) -> float: try: total_start_time = self._get_total_cpu_time(pid) with open("/proc/uptime", "r") as f: uptime_start = float(f.readline().split()[0]) time.sleep(interval) total_end_time = self._get_total_cpu_time(pid) with open("/proc/uptime", "r") as f: uptime_end = float(f.readline().split()[0]) return ( (total_end_time - total_start_time) / (uptime_end - uptime_start) * 100 ) except (FileNotFoundError, IndexError, ValueError, IOError) as e: logging.exception("Failed to get CPU usage.") raise e def _get_total_cpu_time(self, pid: int) -> float: with open(f"/proc/{str(pid)}/stat", "r") as f: stats = f.readline().split() # utime is the 14th field in /proc/[pid]/stat measured in clock ticks. utime = int(stats[13]) # stime is the 15th field in /proc/[pid]/stat measured in clock ticks. stime = int(stats[14]) return (utime + stime) / os.sysconf(os.sysconf_names["SC_CLK_TCK"]) tools/edit_monitor/daemon_manager_test.py +50 −2 Original line number Diff line number Diff line Loading @@ -43,6 +43,25 @@ def long_running_daemon(): time.sleep(1) def memory_consume_daemon_target(size_mb): try: size_bytes = size_mb * 1024 * 1024 dummy_data = bytearray(size_bytes) time.sleep(10) except MemoryError: print(f'Process failed to allocate {size_mb} MB of memory.') def cpu_consume_daemon_target(target_usage_percent): while True: start_time = time.time() while time.time() - start_time < target_usage_percent / 100: pass # Busy loop to consume CPU # Sleep to reduce CPU usage time.sleep(1 - target_usage_percent / 100) class DaemonManagerTest(unittest.TestCase): @classmethod Loading Loading @@ -102,7 +121,7 @@ class DaemonManagerTest(unittest.TestCase): def test_start_success_with_existing_instance_from_different_binary(self): # First start an instance based on "some_binary_path" existing_dm = daemon_manager.DaemonManager( "some_binary_path", 'some_binary_path', daemon_target=long_running_daemon, ) existing_dm.start() Loading Loading @@ -149,6 +168,35 @@ class DaemonManagerTest(unittest.TestCase): # Verifies no daemon process is started. self.assertIsNone(dm.daemon_process) def test_monitor_daemon_subprocess_killed_high_memory_usage(self): dm = daemon_manager.DaemonManager( TEST_BINARY_FILE, daemon_target=memory_consume_daemon_target, daemon_args=(2,), ) dm.start() dm.monitor_daemon(interval=1, memory_threshold=2) self.assertTrue(dm.max_memory_usage >= 2) self.assert_no_subprocess_running() def test_monitor_daemon_subprocess_killed_high_cpu_usage(self): dm = daemon_manager.DaemonManager( TEST_BINARY_FILE, daemon_target=cpu_consume_daemon_target, daemon_args=(20,), ) dm.start() dm.monitor_daemon(interval=1, cpu_threshold=20) self.assertTrue(dm.max_cpu_usage >= 20) self.assert_no_subprocess_running() @mock.patch('subprocess.check_output') def test_monitor_daemon_failed_does_not_matter(self, mock_output): mock_output.side_effect = OSError('Unknown OSError') self.assert_run_simple_daemon_success() def test_stop_success(self): dm = daemon_manager.DaemonManager( TEST_BINARY_FILE, daemon_target=long_running_daemon Loading Loading @@ -194,7 +242,7 @@ class DaemonManagerTest(unittest.TestCase): daemon_args=(damone_output_file.name,), ) dm.start() dm.daemon_process.join() dm.monitor_daemon(interval=1) # Verifies the expected pid file is created. expected_pid_file_path = pathlib.Path(self.working_dir.name).joinpath( Loading Loading
tools/edit_monitor/daemon_manager.py +92 −0 Original line number Diff line number Diff line Loading @@ -25,6 +25,9 @@ import time DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS = 1 DEFAULT_MONITOR_INTERVAL_SECONDS = 5 DEFAULT_MEMORY_USAGE_THRESHOLD = 2000 DEFAULT_CPU_USAGE_THRESHOLD = 10 def default_daemon_target(): Loading @@ -48,6 +51,9 @@ class DaemonManager: self.pid = os.getpid() self.daemon_process = None self.max_memory_usage = 0 self.max_cpu_usage = 0 pid_file_dir = pathlib.Path(tempfile.gettempdir()).joinpath("edit_monitor") pid_file_dir.mkdir(parents=True, exist_ok=True) self.pid_file_path = self._get_pid_file_path(pid_file_dir) Loading @@ -61,6 +67,50 @@ class DaemonManager: except Exception as e: logging.exception("Failed to start daemon manager with error %s", e) def monitor_daemon( self, interval: int = DEFAULT_MONITOR_INTERVAL_SECONDS, memory_threshold: float = DEFAULT_MEMORY_USAGE_THRESHOLD, cpu_threshold: float = DEFAULT_CPU_USAGE_THRESHOLD, ): """Monits the daemon process status. Periodically check the CPU/Memory usage of the daemon process as long as the process is still running and kill the process if the resource usage is above given thresholds. """ logging.info("start monitoring daemon process %d.", self.daemon_process.pid) while self.daemon_process.is_alive(): try: memory_usage = self._get_process_memory_percent(self.daemon_process.pid) self.max_memory_usage = max(self.max_memory_usage, memory_usage) cpu_usage = self._get_process_cpu_percent(self.daemon_process.pid) self.max_cpu_usage = max(self.max_cpu_usage, cpu_usage) time.sleep(interval) except Exception as e: # Logging the error and continue. logging.warning("Failed to monitor daemon process with error: %s", e) if ( self.max_memory_usage >= memory_threshold or self.max_cpu_usage >= cpu_threshold ): logging.error( "Daemon process is consuming too much resource, killing..." ), self._terminate_process(self.daemon_process.pid) logging.info( "Daemon process %d terminated. Max memory usage: %f, Max cpu" " usage: %f.", self.daemon_process.pid, self.max_memory_usage, self.max_cpu_usage, ) def stop(self): """Stops the daemon process and removes the pidfile.""" Loading Loading @@ -180,3 +230,45 @@ class DaemonManager: logging.info("pid_file_path: %s", pid_file_path) return pid_file_path def _get_process_memory_percent(self, pid: int) -> float: try: with open(f"/proc/{pid}/stat", "r") as f: stat_data = f.readline().split() # RSS is the 24th field in /proc/[pid]/stat rss_pages = int(stat_data[23]) return rss_pages * 4 / 1024 # Covert to MB except (FileNotFoundError, IndexError, ValueError, IOError) as e: logging.exception("Failed to get memory usage.") raise e def _get_process_cpu_percent(self, pid: int, interval: int = 1) -> float: try: total_start_time = self._get_total_cpu_time(pid) with open("/proc/uptime", "r") as f: uptime_start = float(f.readline().split()[0]) time.sleep(interval) total_end_time = self._get_total_cpu_time(pid) with open("/proc/uptime", "r") as f: uptime_end = float(f.readline().split()[0]) return ( (total_end_time - total_start_time) / (uptime_end - uptime_start) * 100 ) except (FileNotFoundError, IndexError, ValueError, IOError) as e: logging.exception("Failed to get CPU usage.") raise e def _get_total_cpu_time(self, pid: int) -> float: with open(f"/proc/{str(pid)}/stat", "r") as f: stats = f.readline().split() # utime is the 14th field in /proc/[pid]/stat measured in clock ticks. utime = int(stats[13]) # stime is the 15th field in /proc/[pid]/stat measured in clock ticks. stime = int(stats[14]) return (utime + stime) / os.sysconf(os.sysconf_names["SC_CLK_TCK"])
tools/edit_monitor/daemon_manager_test.py +50 −2 Original line number Diff line number Diff line Loading @@ -43,6 +43,25 @@ def long_running_daemon(): time.sleep(1) def memory_consume_daemon_target(size_mb): try: size_bytes = size_mb * 1024 * 1024 dummy_data = bytearray(size_bytes) time.sleep(10) except MemoryError: print(f'Process failed to allocate {size_mb} MB of memory.') def cpu_consume_daemon_target(target_usage_percent): while True: start_time = time.time() while time.time() - start_time < target_usage_percent / 100: pass # Busy loop to consume CPU # Sleep to reduce CPU usage time.sleep(1 - target_usage_percent / 100) class DaemonManagerTest(unittest.TestCase): @classmethod Loading Loading @@ -102,7 +121,7 @@ class DaemonManagerTest(unittest.TestCase): def test_start_success_with_existing_instance_from_different_binary(self): # First start an instance based on "some_binary_path" existing_dm = daemon_manager.DaemonManager( "some_binary_path", 'some_binary_path', daemon_target=long_running_daemon, ) existing_dm.start() Loading Loading @@ -149,6 +168,35 @@ class DaemonManagerTest(unittest.TestCase): # Verifies no daemon process is started. self.assertIsNone(dm.daemon_process) def test_monitor_daemon_subprocess_killed_high_memory_usage(self): dm = daemon_manager.DaemonManager( TEST_BINARY_FILE, daemon_target=memory_consume_daemon_target, daemon_args=(2,), ) dm.start() dm.monitor_daemon(interval=1, memory_threshold=2) self.assertTrue(dm.max_memory_usage >= 2) self.assert_no_subprocess_running() def test_monitor_daemon_subprocess_killed_high_cpu_usage(self): dm = daemon_manager.DaemonManager( TEST_BINARY_FILE, daemon_target=cpu_consume_daemon_target, daemon_args=(20,), ) dm.start() dm.monitor_daemon(interval=1, cpu_threshold=20) self.assertTrue(dm.max_cpu_usage >= 20) self.assert_no_subprocess_running() @mock.patch('subprocess.check_output') def test_monitor_daemon_failed_does_not_matter(self, mock_output): mock_output.side_effect = OSError('Unknown OSError') self.assert_run_simple_daemon_success() def test_stop_success(self): dm = daemon_manager.DaemonManager( TEST_BINARY_FILE, daemon_target=long_running_daemon Loading Loading @@ -194,7 +242,7 @@ class DaemonManagerTest(unittest.TestCase): daemon_args=(damone_output_file.name,), ) dm.start() dm.daemon_process.join() dm.monitor_daemon(interval=1) # Verifies the expected pid file is created. expected_pid_file_path = pathlib.Path(self.working_dir.name).joinpath( Loading