Current File : //proc/self/root/opt/imunify360/venv/lib/python3.11/site-packages/imav/server.py
"""
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.


This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
See the GNU General Public License for more details.


You should have received a copy of the GNU General Public License
 along with this program.  If not, see <https://www.gnu.org/licenses/>.

Copyright © 2019 Cloud Linux Software Inc.

This software is also available under ImunifyAV commercial license,
see <https://www.imunify360.com/legal/eula>
"""
import argparse
import asyncio
import gc
import logging
import os
import signal
import sys
import time
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager, suppress
from functools import partial
from pathlib import Path
from subprocess import CalledProcessError, check_output
from typing import Tuple

import daemon
from lockfile import AlreadyLocked
import daemon.pidfile
import psutil

import defence360agent.internals.logger
from defence360agent import files
from defence360agent.api import health, inactivity
from defence360agent.contracts.config import (
    ConfigsValidator,
    Core,
    Merger,
    Model,
    SimpleRpc,
)
from defence360agent.contracts.hook_events import HookEvent
from defence360agent.contracts.license import LicenseCLN
from defence360agent.contracts.plugins import MessageSink, MessageSource
from defence360agent.internals.global_scope import g
from defence360agent.internals.iaid import IndependentAgentIDAPI
from defence360agent.internals.the_sink import TheSink
from defence360agent.model import instance, simplification, tls_check
from defence360agent.simple_rpc import (
    NonRootRpcServer,
    NonRootRpcServerAV,
    RpcServer,
    RpcServerAV,
    is_running,
)
from defence360agent.subsys import systemd_notifier
from defence360agent.utils import (
    Task,
    create_task_and_log_exceptions,
    is_root_user,
    is_systemd_boot,
)
from defence360agent.utils.check_db import is_db_corrupted
from defence360agent.utils.cli import EXITCODE_GENERAL_ERROR
from defence360agent.utils.common import DAY, rate_limit
from defence360agent.sentry import flush_sentry
from imav.malwarelib.config import (
    MalwareHitStatus,
    MalwareScanResourceType,
)
from imav.malwarelib.model import MalwareHit
import sentry_sdk

# Increase recursion depth to allow malware scanner into deeply nested
# directories with absolute path length up to 4096 symbols
_MAX_RECURSION_DEPTH = 2100
_DB_IS_CORRUPTED_FLAG = Path("%s.is_corrupted" % Model.PATH)
_DB_IS_CORRUPTED_MSG = (
    "Imunify360 database is corrupt. "
    "Application cannot run with corrupt database. "
    "Please, contact Imunify360 support team at "
    "https://cloudlinux.zendesk.com"
)

logger = logging.getLogger(__name__)
throttled_log_error = rate_limit(period=DAY)(logger.error)


class TaskFactory:
    def __init__(self):
        self.pool = set()

    def __call__(self, loop, coro):
        task = Task(coro, loop=loop)
        self.pool.add(task)
        task.add_done_callback(self.pool.discard)
        return task


@contextmanager
def log_and_suppress_error(message):
    """Log *message* on any error & suppress it."""
    try:
        yield
    except Exception as e:
        logger.error("caught error %r on %s", e, message)
        sentry_sdk.capture_exception(e)


async def _shutdown_task(loop, the_sink, plugin_list):
    with log_and_suppress_error("marking the start of the shutdown process"):
        # (there is SHUTDOWN_TIMEOUT)
        health.sensor.shutting_down(time.time())

    logger.info("shutdown task starting, pid=%s", os.getpid())
    with log_and_suppress_error(
        "preventing new messages (if any) processing to start"
    ):
        _tasks = []
        async with asyncio.timeout(10):
            if "sensor_server" in g:
                g.sensor_server.close()
                _tasks.append(g.sensor_server.wait_closed())
                # note: first exception is propagated; tasks are no canceled
            _tasks.append(the_sink.shutdown())
            await asyncio.gather(*_tasks)

    for plugin in sorted(plugin_list, key=lambda p: p.SHUTDOWN_PRIORITY):
        with log_and_suppress_error(
            "This happened while shutting down a plugin!!"
        ):
            logger.info(
                "Shutting down %s.%s...",
                plugin.__class__.__module__,
                plugin.__class__.__name__,
            )
            # make shutting down running task be a responsibility
            # of a particular plugin but not of a universal shotgun
            await plugin.shutdown()

    with log_and_suppress_error("shutting down IAID API"):
        await IndependentAgentIDAPI.shutdown()

    # Wait for graceful web-server restart (if it was started before shutdown)
    if (restart_task := g.get("web_server_restart_task")) is not None:
        with log_and_suppress_error("waiting for web server restart"):
            await asyncio.wait_for(restart_task)

    with log_and_suppress_error("stopping loop"):
        loop.stop()

    flush_sentry()

    logger.info("shutdown task finished, pid=%s", os.getpid())


def _daemonize(pidfilepath):
    logger.info("Run as daemon [pidfile = %s]", pidfilepath)

    dc = daemon.DaemonContext()
    dc.pidfile = daemon.pidfile.PIDLockFile(pidfilepath)

    dc.prevent_core = False
    dc.umask = Core.FILE_UMASK
    if is_systemd_boot():
        dc.detach_process = False
    else:
        dc.detach_process = True
    dc.files_preserve = defence360agent.internals.logger.get_fds()
    try:
        dc.open()
    except AlreadyLocked:
        logger.error("PID file already locked by another process")
        sys.exit(EXITCODE_GENERAL_ERROR)
    gc.collect()

    # quirk: somehow this is needed for root logger messages to do not
    #        propagate to specialized loggers, e.g. 'perf', 'nework'
    defence360agent.internals.logger.reconfigure()


async def _initial_files_update():
    """Perform update files on start."""
    await files.update_all_no_fail_if_files_exist()


def _tls_check_reset(loop):
    # init thread id for simplification.run_in_executor() worker thread
    loop.run_until_complete(
        simplification.run_in_executor(loop, tls_check.reset)
    )

    # mark current thread as "main_thread" for more informative error messages
    # PSSST! simplification.run_in_executor() is main thread now! :-X
    # tls_check.reset("main_thread")


def plugin_instances(objs, pclass):
    return [p for p in objs if isinstance(p, pclass)]


def _start_plugins(loop, plugin_classes) -> Tuple[TheSink, list, list]:
    plugins = [plugin_class() for plugin_class in plugin_classes]

    # instantiate sinks
    sinks = plugin_instances(plugins, MessageSink)
    for s in sinks:
        logger.info("Creating sink %r", s)
        loop.run_until_complete(s.create_sink(loop))

    # instantiate sources
    the_sink = TheSink(sinks, loop)
    sources = plugin_instances(plugins, MessageSource)
    for s in sources:
        logger.info("Creating source %r", s)
        loop.run_until_complete(s.create_source(loop, the_sink))

    the_sink.start()

    return the_sink, sinks, sources


def _start_rpc(loop, the_sink: TheSink):
    logger.info("Starting RpcServers...")
    if SimpleRpc.SOCKET_ACTIVATION:
        rpc_servers = (RpcServerAV, NonRootRpcServerAV)
    else:
        rpc_servers = (RpcServer, NonRootRpcServer)
    for rpc in rpc_servers:
        loop.run_until_complete(rpc.create(loop, the_sink))


def _get_pids_open(*files):
    try:
        out = check_output(
            ["lsof", "+wt"] + list(files),
            env={"PATH": "/usr/sbin:/usr/bin", **os.environ},
        )
    except CalledProcessError as e:
        out = bytes(e.output)
    except FileNotFoundError:
        logger.warning("There is no lsof in /usr/sbin:/usr/bin")
        return []
    except IOError:
        return []
    lines = out.strip().split(b"\n")
    pids = [int(line) for line in lines if line]
    return list(set(pids))


def _check_able_to_start(pidfile):
    if is_running():
        # get parent process info
        ppid = os.getppid()
        if ppid != 0:
            parent = psutil.Process(ppid).name()
            pids_used_socket = _get_pids_open(
                SimpleRpc.SOCKET_PATH, SimpleRpc.NON_ROOT_SOCKET_PATH
            )
            process_used_socket = []
            for pid in pids_used_socket:
                try:
                    _pr = psutil.Process(pid)
                except psutil.NoSuchProcess:
                    continue
                _local_parent = _pr.parent()
                if _local_parent:
                    _parent_name = _local_parent.name()
                else:
                    _parent_name = "None"
                process_used_socket.append(
                    (
                        pid,
                        _pr.name(),
                        "parent process = %s" % str(_parent_name),
                    )
                )
            try:
                with open(pidfile) as file:
                    written_pid = file.read()
            except (OSError, IOError):
                written_pid = None
            throttled_log_error(
                "Instance of %s is already running. "
                'Parent process "%s" with pid "%s". '
                "Sockets are in use by %s. "
                "%s file contents %s pid"
                % (
                    Core.SVC_NAME,
                    parent,
                    ppid,
                    str(process_used_socket),
                    pidfile,
                    written_pid,
                )
            )
            sys.exit(EXITCODE_GENERAL_ERROR)

    if is_db_corrupted(db_path=Model.PATH):
        if not _DB_IS_CORRUPTED_FLAG.exists():
            logger.error(_DB_IS_CORRUPTED_MSG)
            _DB_IS_CORRUPTED_FLAG.touch()
        else:
            logger.warning(_DB_IS_CORRUPTED_MSG)
        sys.exit(EXITCODE_GENERAL_ERROR)
    else:
        with suppress(FileNotFoundError):
            _DB_IS_CORRUPTED_FLAG.unlink()


def start(plugin_classes: list, init_actions) -> None:
    """Common function for agent service startup.

    plugin_classes is a list of classes implementing message processing
    plugins. init_actions is a coroutine that will be called prior to starting
    RPC and message processing."""
    if not is_root_user():
        logger.info("Imunify agent could be started by the root user only!")
        sys.exit(EXITCODE_GENERAL_ERROR)

    args = parse_cli()

    defence360agent.internals.logger.setLogLevel(args.verbose)
    if args.log_config or os.environ.get("IMUNIFY360_LOGGING_CONFIG_FILE"):
        defence360agent.internals.logger.update_logging_config_from_file(
            args.log_config or os.environ.get("IMUNIFY360_LOGGING_CONFIG_FILE")
        )

    sys.setrecursionlimit(_MAX_RECURSION_DEPTH)

    _check_able_to_start(args.pidfile)

    if args.daemon:
        _daemonize(args.pidfile)
        systemd_notifier.notify(systemd_notifier.AgentState.DAEMONIZED)

    health.sensor.starting(time.time())
    if not LicenseCLN.is_registered():
        health.sensor.unregistered()

    loop = asyncio.get_event_loop()
    _cpu = os.cpu_count()
    # https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
    # default's in Python 3.8
    loop.set_default_executor(
        ThreadPoolExecutor(max_workers=min(32, _cpu + 4 if _cpu else 5))
    )
    loop.set_task_factory(TaskFactory())
    try:
        _tls_check_reset(loop)
        instance.db.init(Model.PATH)

        validate_configs_on_start(loop)
        Merger.update_merged_config()

        loop.run_until_complete(init_actions())
        try:
            _stop_pending_cleanup()
        except simplification.PeeweeException as e:
            # we intentionally capture all exceptions here and log them
            # it may happened on package update or other reasons, we don't
            # want to start agent in such case
            logger.error("Failed to stop pending cleanup. Reason: %s", repr(e))
            sys.exit(EXITCODE_GENERAL_ERROR)

        # If this is first agent run - we SHOULD download
        # all of the static files
        # If it isn't first agent run - essential files already downloaded
        # and will be updated asynchronously
        if not loop.run_until_complete(files.essential_files_exist()):
            logger.info(
                "Essential files are missing. Performing initial files update."
            )
            loop.run_until_complete(_initial_files_update())
        inactivity.track.set_timeout(SimpleRpc.INACTIVITY_TIMEOUT)

        the_sink, sinks, sources = _start_plugins(loop, plugin_classes)
        _start_rpc(loop, the_sink)
        logger.info("Message Bus started")
        agent_started = HookEvent.AgentStarted(
            version=Core.VERSION, resident=False
        )
        create_task_and_log_exceptions(
            loop, the_sink.process_message, agent_started
        )

        # note: plugins are started before the shutdown task has been setup
        #  therefore plugin.shutdown() won't be called before create_source()
        _setup_signal_handlers(
            loop, partial(_shutdown_task, loop, the_sink, sinks + sources)
        )
        loop.run_forever()
        logger.info("loop stopped")
    finally:
        # closing the loop after loop.stop() cuts off pending tasks stacktraces
        loop.close()


def validate_configs_on_start(loop):
    try:
        ConfigsValidator.validate_config_layers()
    except Exception as e:
        from defence360agent.hooks.execute import execute_hooks

        agent_misconfig = HookEvent.AgentMisconfig(error=repr(e))
        loop.run_until_complete(execute_hooks(agent_misconfig))
        logger.warning(str(e))
        sys.exit(EXITCODE_GENERAL_ERROR)


def _setup_signal_handlers(loop, shutdowntask):
    called = False  # whether the signal handler was called already

    def _sighandler(loop, sig):
        nonlocal called
        if not called:
            called = True
            logger.info("Caught %s", sig)
            # note: store ref, to keep the task alive, just in case
            called = create_task_and_log_exceptions(loop, shutdowntask)
        else:
            logger.info(
                "Caught %s. Shutdown task is already running, please wait.",
                sig,
            )

    for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGUSR1, signal.SIGUSR2):
        loop.add_signal_handler(sig, _sighandler, loop, sig)


def parse_cli():
    parser = argparse.ArgumentParser(description="Run imunify agent")
    parser.add_argument(
        "-v",
        dest="verbose",
        action="count",
        default=0,
        help=(
            "Level of logging. Each value corresponds to:"
            "1 - console only log level,"
            "2 - previous plus add network log,"
            "3 - all previous plus add process message log,"
            "4 - all previous plus add debug log"
        ),
    )
    parser.add_argument("--daemon", action="store_true", help="run as daemon")
    parser.add_argument(
        "--pidfile",
        default="/var/run/imunify360.pid",
        help="use with --daemon",
    )
    parser.add_argument("--log-config", help="logging config filename")
    return parser.parse_args(sys.argv[1:])


def _stop_pending_cleanup():
    """
    Get back to FOUND all malware hits which have stuck in CLEANUP_STARTED
    """
    hits = MalwareHit.select().where(
        MalwareHit.status == MalwareHitStatus.CLEANUP_STARTED,
        MalwareHit.resource_type == MalwareScanResourceType.FILE.value,
    )
    MalwareHit.set_status(hits, MalwareHitStatus.FOUND)