#!/opt/imunify360/venv/bin/python3 import argparse import asyncio import gc import json import logging import os import signal import socket import sys import time from concurrent.futures import ThreadPoolExecutor from contextlib import contextmanager, suppress from functools import partial from pathlib import Path from typing import Tuple import daemon import daemon.pidfile import defence360agent.internals.logger import sentry_sdk from lockfile import AlreadyLocked from defence360agent import files from defence360agent.api import health from defence360agent.contracts.config import ( ConfigsValidator, Core, Merger, Model, is_mi_freemium_license, ) from defence360agent.contracts.hook_events import HookEvent from defence360agent.contracts.license import LicenseCLN from defence360agent.contracts.plugins import ( BasePlugin, MessageSink, MessageSource, ) from defence360agent.internals.cln import subscribe_to_license_changes from defence360agent.internals.global_scope import g from defence360agent.internals.iaid import IndependentAgentIDAPI from defence360agent.internals.lazy_load import CoreSource from defence360agent.internals.the_sink import TheSink from defence360agent.model import instance, simplification, tls_check from defence360agent.subsys import systemd_notifier from defence360agent.utils import ( Scope, Task, create_task_and_log_exceptions, importer, is_root_user, is_systemd_boot, ) from defence360agent.utils.check_db import is_db_corrupted from defence360agent.utils.cli import EXITCODE_GENERAL_ERROR from defence360agent.utils.common import DAY, rate_limit from defence360agent.sentry import flush_sentry from im360.application.settings import configure from im360.contracts.config import IPSET_LISTS_PATH from im360.internals.core import ip_versions from im360.internals.lazy_load import IM360Source from im360.subsys import features from imav.internals.lazy_load import AVSource # Increase recursion depth to allow malware scanner into deeply nested # directories with absolute path length up to 4096 symbols _MAX_RECURSION_DEPTH = 2100 _DB_IS_CORRUPTED_FLAG = Path("%s.is_corrupted" % Model.RESIDENT_PATH) _DB_IS_CORRUPTED_MSG = ( "Imunify360 database is corrupt. " "Application cannot run with corrupt database. " "Please, contact Imunify360 support team at " "https://cloudlinux.zendesk.com" ) IM360_RESIDENT_PLUGINS_PACKAGES = ( "defence360agent.plugins.accumulate", "im360.plugins.resident", "im360.plugins.protector", "im360.plugins.sensor", ) SIMPLE_RPC_SOCKET = "/var/run/defence360agent/simple_rpc.sock" logger = logging.getLogger("resident") throttled_log_error = rate_limit(period=DAY)(logger.error) class TaskFactory: def __init__(self): self.pool = set() def __call__(self, loop, coro): task = Task(coro, loop=loop) self.pool.add(task) task.add_done_callback(self.pool.discard) return task def request_install_files(): with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as sock: sock.settimeout(15) try: sock.connect(SIMPLE_RPC_SOCKET) except (FileNotFoundError, ConnectionRefusedError): logger.error("RPC socket isn't available yet") return msg = json.dumps({"command": ["update"], "params": {}}) + "\n" sock.sendall(msg.encode()) def get_plugins() -> list: """Return plugins in unspecified order.""" importer.load_packages( CoreSource.MESSAGES + AVSource.MESSAGES + IM360Source.MESSAGES ) importer.load_packages(IM360_RESIDENT_PLUGINS_PACKAGES) # use lexicographical order (but don't rely on it in code) return sorted( [ plugin for plugin in BasePlugin.get_active_plugins() if plugin.SCOPE is not Scope.AV and (not is_mi_freemium_license() or plugin.AVAILABLE_ON_FREEMIUM) ], key=lambda item: f"{item.__module__}.{item.__name__}", ) @contextmanager def log_and_suppress_error(message): """Log *message* on any error & suppress it.""" try: yield except Exception as e: logger.error("caught error %r on %s", e, message) sentry_sdk.capture_exception(e) async def update_health_sensor(): if LicenseCLN.is_valid(): health.sensor.registered() else: health.sensor.unregistered() async def init_actions(): # Any uncaught exceptions here prevents the agent from # starting. Non-critical (agent can continue) functionality should # either be moved to a plugin or caught&log *all* its exceptions. # Also nothing should block here indefinitely (any operation that # may block should have a timeout). ip_versions.init() subscribe_to_license_changes(features.update_repos) subscribe_to_license_changes(update_health_sensor) instance.db.execute_sql( "ATTACH '{}' AS proactive".format(Model.PROACTIVE_PATH) ) def parse_cli(): parser = argparse.ArgumentParser(description="Run imunify agent") parser.add_argument( "-v", dest="verbose", action="count", default=0, help=( "Level of logging. Each value corresponds to:" "1 - console only log level," "2 - previous plus add network log," "3 - all previous plus add process message log," "4 - all previous plus add debug log" ), ) parser.add_argument("--daemon", action="store_true", help="run as daemon") parser.add_argument( "--pidfile", default="/var/run/imunify360.pid", help="use with --daemon", ) parser.add_argument("--log-config", help="logging config filename") return parser.parse_args(sys.argv[1:]) def _check_able_to_start(pidfile): if is_db_corrupted(db_path=Model.RESIDENT_PATH): if not _DB_IS_CORRUPTED_FLAG.exists(): logger.error(_DB_IS_CORRUPTED_MSG) _DB_IS_CORRUPTED_FLAG.touch() else: logger.warning(_DB_IS_CORRUPTED_MSG) sys.exit(EXITCODE_GENERAL_ERROR) else: with suppress(FileNotFoundError): _DB_IS_CORRUPTED_FLAG.unlink() def _daemonize(pidfilepath): logger.info("Run as daemon [pidfile = %s]", pidfilepath) dc = daemon.DaemonContext() dc.pidfile = daemon.pidfile.PIDLockFile(pidfilepath) dc.prevent_core = False dc.umask = Core.FILE_UMASK if is_systemd_boot(): dc.detach_process = False else: dc.detach_process = True dc.files_preserve = defence360agent.internals.logger.get_fds() try: dc.open() except AlreadyLocked: logger.error("PID file already locked by another process") sys.exit(EXITCODE_GENERAL_ERROR) gc.collect() # quirk: somehow this is needed for root logger messages to do not # propagate to specialized loggers, e.g. 'perf', 'nework' defence360agent.internals.logger.reconfigure() def _tls_check_reset(loop): # init thread id for simplification.run_in_executor() worker thread loop.run_until_complete( simplification.run_in_executor(loop, tls_check.reset) ) def validate_configs_on_start(loop): try: ConfigsValidator.validate_config_layers() except Exception as e: from defence360agent.hooks.execute import execute_hooks agent_misconfig = HookEvent.AgentMisconfig(error=repr(e)) loop.run_until_complete(execute_hooks(agent_misconfig)) logger.warning(str(e)) sys.exit(EXITCODE_GENERAL_ERROR) def plugin_instances(objs, pclass): return [p for p in objs if isinstance(p, pclass)] def _start_plugins(loop, plugin_classes) -> Tuple[TheSink, list, list]: plugins = [plugin_class() for plugin_class in plugin_classes] # instantiate sinks sinks = plugin_instances(plugins, MessageSink) for s in sinks: logger.info("Creating sink %r", s) loop.run_until_complete(s.create_sink(loop)) # instantiate sources the_sink = TheSink(sinks, loop) sources = plugin_instances(plugins, MessageSource) for s in sources: logger.info("Creating source %r", s) loop.run_until_complete(s.create_source(loop, the_sink)) the_sink.start() return the_sink, sinks, sources def _setup_signal_handlers(loop, shutdowntask): called = False # whether the signal handler was called already def _sighandler(loop, sig): nonlocal called if not called: called = True logger.info("Caught %s", sig) # note: store ref, to keep the task alive, just in case called = create_task_and_log_exceptions(loop, shutdowntask) else: logger.info( "Caught %s. Shutdown task is already running, please wait.", sig, ) for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGUSR1, signal.SIGUSR2): loop.add_signal_handler(sig, _sighandler, loop, sig) async def _shutdown_task(loop, the_sink, plugin_list): with log_and_suppress_error("marking the start of the shutdown process"): # (there is SHUTDOWN_TIMEOUT) health.sensor.shutting_down(time.time()) logger.info("shutdown task starting, pid=%s", os.getpid()) g["shutdown_started"] = True with log_and_suppress_error( "preventing new messages (if any) processing to start" ): g.sensor_server.close() # note: first exception is propagated; tasks are no canceled async with asyncio.timeout(10): await asyncio.gather( g.sensor_server.wait_closed(), the_sink.shutdown(), ) for plugin in sorted(plugin_list, key=lambda p: p.SHUTDOWN_PRIORITY): with log_and_suppress_error( "This happened while shutting down a plugin!!" ): logger.info( "Shutting down %s.%s...", plugin.__class__.__module__, plugin.__class__.__name__, ) # make shutting down running task be a responsibility # of a particular plugin but not of a universal shotgun await plugin.shutdown() with log_and_suppress_error("shutting down IAID API"): await IndependentAgentIDAPI.shutdown() # Wait for graceful web-server restart (if it was started before shutdown) if (restart_task := g.get("web_server_restart_task")) is not None: with log_and_suppress_error("waiting for web server restart"): await asyncio.wait_for(restart_task) with log_and_suppress_error("stopping loop"): loop.stop() flush_sentry() logger.info("shutdown task finished, pid=%s", os.getpid()) def start(plugin_classes: list, init_actions) -> None: """Common function for agent service startup. plugin_classes is a list of classes implementing message processing plugins. init_actions is a coroutine that will be called prior to starting RPC and message processing.""" if not is_root_user(): logger.info("Imunify agent could be started by the root user only!") sys.exit(EXITCODE_GENERAL_ERROR) args = parse_cli() if os.environ.get("DEBUG") == "true": g.DEBUG = True defence360agent.internals.logger.setLogLevel(args.verbose) if args.log_config or os.environ.get("IMUNIFY360_LOGGING_CONFIG_FILE"): defence360agent.internals.logger.update_logging_config_from_file( args.log_config or os.environ.get("IMUNIFY360_LOGGING_CONFIG_FILE") ) sys.setrecursionlimit(_MAX_RECURSION_DEPTH) _check_able_to_start(args.pidfile) if args.daemon: _daemonize(args.pidfile) systemd_notifier.notify(systemd_notifier.AgentState.DAEMONIZED) health.sensor.starting(time.time()) if not LicenseCLN.is_registered(): health.sensor.unregistered() loop = asyncio.get_event_loop() _cpu = os.cpu_count() # https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor # default's in Python 3.8 loop.set_default_executor( ThreadPoolExecutor(max_workers=min(32, _cpu + 4 if _cpu else 5)) ) loop.set_task_factory(TaskFactory()) try: _tls_check_reset(loop) instance.db.init(Model.PATH) instance.db.execute_sql("ATTACH ? AS resident", (Model.RESIDENT_PATH,)) instance.db.execute_sql("ATTACH ? AS ipsetlists", (IPSET_LISTS_PATH,)) validate_configs_on_start(loop) Merger.update_merged_config() loop.run_until_complete(init_actions()) # If this is first agent run - we expect that *only* non-resident # process will download all of the static files # If it isn't first agent run - essential files already downloaded # and will be updated asynchronously if not loop.run_until_complete(files.essential_files_exist()): logger.info( "Essential files are missing. Try to update them and restart." ) request_install_files() # wait a while to avoid restarting the service too often loop.run_until_complete(asyncio.sleep(5)) sys.exit(EXITCODE_GENERAL_ERROR) the_sink, sinks, sources = _start_plugins(loop, plugin_classes) logger.info("Message Bus started") agent_started = HookEvent.AgentStarted( version=Core.VERSION, resident=True ) create_task_and_log_exceptions( loop, the_sink.process_message, agent_started ) # note: plugins are started before the shutdown task has been setup # therefore plugin.shutdown() won't be called before create_source() _setup_signal_handlers( loop, partial(_shutdown_task, loop, the_sink, sinks + sources) ) loop.run_forever() logger.info("loop stopped") finally: # closing the loop after loop.stop() cuts off pending tasks stacktraces loop.close() def run(): configure(resident=True) plugins = get_plugins() start(plugins, init_actions) if __name__ == "__main__": run() logger.info("agent stopped")