Current File : //opt/imunify360/venv/lib64/python3.11/site-packages/imav/malwarelib/cleanup/cleaner.py
"""
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.


This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
See the GNU General Public License for more details.


You should have received a copy of the GNU General Public License
 along with this program.  If not, see <https://www.gnu.org/licenses/>.

Copyright © 2019 Cloud Linux Software Inc.

This software is also available under ImunifyAV commercial license,
see <https://www.imunify360.com/legal/eula>
"""
import asyncio
import json
import logging
import os
import subprocess
import tempfile
import time
from collections import defaultdict
from contextlib import suppress
from itertools import islice
from typing import Callable, Dict, List, Optional, Set, Tuple, Union

from defence360agent.contracts.config import (
    Malware,
    MalwareSignatures,
    MyImunifyConfig,
)
from defence360agent.contracts.messages import MessageType
from defence360agent.contracts.permissions import (
    ms_clean_requires_myimunify_protection,
)
from defence360agent.utils import (
    RecurringCheckStop,
    Singleton,
    base64_encode_filename,
    recurring_check,
)
from imav.contracts.config import MalwareTune
from imav.malwarelib.model import MalwareHit
from imav.malwarelib.utils.revisium import (
    RevisiumCSVFile,
    RevisiumJsonFile,
    RevisiumTempFile,
)

logger = logging.getLogger(__name__)


def cleaner_result_instance(tempdir=None, mode=None):
    if MalwareTune.USE_JSON_REPORT:
        return RevisiumJsonFile(tempdir, mode)
    return RevisiumCSVFile(tempdir, mode)


class MalwareCleanerLog(RevisiumTempFile):
    pass


class MalwareCleanerProgress(RevisiumJsonFile):
    """
    Get progress from external source
    """

    _progress = 0

    @recurring_check(2)
    async def watch(self, callback):
        try:
            data = self.read()
        except FileNotFoundError:
            raise RecurringCheckStop()
        except json.JSONDecodeError:
            return

        progress = data["current"]

        increment, self._progress = progress - self._progress, progress

        callback(increment)


class MalwareCleanupFileList(RevisiumTempFile):
    def write(self, filelist):
        with self._path.open("wb") as w:
            w.writelines(base64_encode_filename(f) + b"\n" for f in filelist)


def _parse_int(value: Union[str, int]) -> int:
    """Convert str|int to int, in case errors return -2
    -1 used as default value when storing CH
    """
    try:
        return int(value)
    except ValueError:
        return -2


class CleanupResultEntry(dict):
    def __init__(self, data: Dict[str, Union[str, int]]):
        # fields:
        # d - cleanup result
        # e - error description
        # s - signature that was triggered for the file during scan
        # f - file path (it's unexpected that f is absent)
        # r - the result of aibolit rescan after cleanup
        #
        # We shouldn't fail on parsing one record (to do not stop processing
        #  report), so we consider default values for all fields.
        super().__init__(
            d=_parse_int(data.get("d", -1)),
            e=_parse_int(data.get("e", -1)),
            s=data["s"],
            f=data["f"],
            r=_parse_int(data.get("r", -1)),
        )

    def is_cleaned(self):
        if self.is_failed() or self.requires_myimunify_protection():
            return False

        if self["e"] == 4:
            logger.warning(
                "File has changed, assuming that it was cleaned: %s", self["f"]
            )
            return True

        return self["e"] == 0 and self["d"] == 0

    def is_removed(self):
        return not self.is_failed() and self["e"] == 0 and self["d"] > 0

    def is_failed(self):
        return self["r"] == 1

    def requires_myimunify_protection(self):
        return self["r"] == 2

    def not_exist(self):
        return not self.is_failed() and self["e"] == 5


class CleanupResult(Dict[str, CleanupResultEntry]):
    """
    Cleanup result container for result entries
    """

    def __init__(self, report=None):
        if report:
            super().__init__({e["f"]: CleanupResultEntry(e) for e in report})

    @staticmethod
    def __key(hit: Union[str, MalwareHit]) -> str:
        return getattr(hit, "orig_file", hit)

    def __contains__(self, hit: Union[str, MalwareHit]):
        return super().__contains__(self.__key(hit))

    def __getitem__(self, hit: Union[str, MalwareHit]):
        return super().__getitem__(self.__key(hit))


class MalwareCleaner:
    PROCU_PATH = "/opt/ai-bolit/procu2.php"
    PROCU_DB = MalwareSignatures.PROCU_DB

    def __init__(self, loop=None, sink=None):
        self._loop = loop if loop else asyncio.get_event_loop()
        self._proxy = MalwareCleanupProxy()
        self._sink = sink

    def _cmd(
        self,
        filename,
        progress_path,
        result_path,
        log_path,
        soft,
        *,
        username,
        blacklist=None,
        use_csv=True,
        standard_only=True,
    ):
        cmd = [
            "/opt/ai-bolit/wrapper",
            self.PROCU_PATH,
            "--deobfuscate",
            "--nobackup",
            "--forcibly_cleanup",
            "--rescan",
            "--list=%s" % filename,
            "--input-fn-b64-encoded",
            "--username=%s" % username,
        ]
        if blacklist:
            cmd.append("--black-list=%s" % blacklist)
        cmd.extend(
            [
                "--log=%s" % log_path,
                "--progress=%s" % progress_path,
            ]
        )
        if Malware.CLEANUP_DISABLE_CLOUDAV:
            cmd.append("--disable-cloudav")

        if use_csv:
            cmd.extend(["--csv_result=%s" % result_path])
        else:
            cmd.extend(["--result=%s" % result_path])

        if standard_only:
            cmd.extend(["--standard-only"])

        if os.path.exists(self.PROCU_DB):
            cmd.append("--avdb")
            cmd.append(self.PROCU_DB)
        if soft:
            cmd.append("--soft")

        return cmd

    @staticmethod
    def _get_cleaner_error_info(
        exc: Exception,
        cmd: List[str],
        returncode: int,
        stdout: Optional[bytes],
        stderr: Optional[bytes],
    ):
        return dict(
            exception=exc.__class__.__name__,
            return_code=returncode,
            command=cmd,
            out=stdout.decode(errors="replace") if stdout is not None else "",
            err=stderr.decode(errors="replace") if stderr is not None else "",
        )

    async def _send_cleanup_failed_message(self, info: dict):
        if self._sink:
            try:
                msg = MessageType.CleanupFailed(
                    {**info, **{"timestamp": int(time.time())}}
                )
                await self._sink.process_message(msg)
            except asyncio.CancelledError:
                raise
            except Exception:
                logger.exception(
                    "Exception while sending CleanupFailed message"
                )

    async def start(
        self,
        user,
        filelist,
        soft=True,
        blacklist=None,
        standard_only=None,
    ) -> Tuple[CleanupResult, Optional[str], List[str]]:
        tempdir = tempfile.gettempdir()
        result_file = cleaner_result_instance(tempdir=tempdir)
        use_csv = isinstance(result_file, RevisiumCSVFile)
        standard_only = self.is_standard_only(user, standard_only)

        with MalwareCleanupFileList(
            tempdir=tempdir, mode=0o644
        ) as flist, MalwareCleanupFileList(
            tempdir=tempdir, mode=0o644
        ) as blk, MalwareCleanerProgress(
            tempdir=tempdir
        ) as progress, result_file as result, MalwareCleanerLog(
            tempdir=tempdir
        ) as log:
            flist.write(filelist)
            if blacklist:
                blk.write(blacklist)
            self._loop.create_task(progress.watch(self._proxy.progress_cb))

            if blacklist:
                cmd = self._cmd(
                    flist.filename,
                    progress.filename,
                    result.filename,
                    log.filename,
                    soft,
                    username=user,
                    blacklist=blk.filename,
                    use_csv=use_csv,
                    standard_only=standard_only,
                )
            else:
                cmd = self._cmd(
                    flist.filename,
                    progress.filename,
                    result.filename,
                    log.filename,
                    soft,
                    username=user,
                    use_csv=use_csv,
                    standard_only=standard_only,
                )
            logger.debug("Executing %s", " ".join(cmd))

            out, err = b"", b""
            proc = None
            try:
                proc = await asyncio.subprocess.create_subprocess_exec(
                    *cmd,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                )
                out, err = await proc.communicate()
                report = result.read()
            except asyncio.CancelledError:
                if proc:
                    with suppress(ProcessLookupError):
                        proc.terminate()
                raise
            except Exception as exc:
                info = self._get_cleaner_error_info(
                    exc,
                    cmd,
                    proc.returncode if proc else 126,  # 126 - permission error
                    stdout=out,
                    stderr=err,
                )
                # Group errors by exit code on Sentry
                logger.error(
                    f"Cleanup failed exit_code={info.get('return_code')}: %s",
                    f"{info.get('out')} {info.get('err')}",
                    extra={**info, "exception": exc},
                )
                await self._send_cleanup_failed_message(
                    {**info, **dict(message=str(exc))}
                )
                return CleanupResult(), repr(exc), cmd

            return CleanupResult(report), None, cmd

    @staticmethod
    def is_standard_only(user: str, standard_only: bool) -> bool:
        """Check if only standard signatures should be applied for the user"""

        # FIXME: DEF-20763 Remove this line to enable standard signatures
        return False

        if not MyImunifyConfig.ENABLED:
            # Ignore standard_only value if MyImunify is disabled
            return False
        elif standard_only is None:
            # When cleaned by default action
            return not ms_clean_requires_myimunify_protection(user)

        return standard_only


class MalwareCleanupProxy(metaclass=Singleton):
    _CHUNK_SIZE = 10000
    """
    Class to interconnect Cleanup status endpoint and Cleanup plugin
    """

    def __init__(self):
        self.current = self.total = 0
        self.hits = defaultdict(set)

    def add(self, cause, initiator, post_action, scan_id, standard_only, hits):
        self.hits[
            (cause, initiator, post_action, scan_id, standard_only)
        ].update(hits)

    def flush(self) -> Tuple[str, str, Callable, str, Set]:
        while self.hits:
            scan_info, hits = self.hits.popitem()

            all_hits = iter(hits)
            hits = set(islice(all_hits, self._CHUNK_SIZE))

            remaining_hit = next(all_hits, None)
            if remaining_hit is not None:
                self.hits[scan_info].add(remaining_hit)
                self.hits[scan_info].update(all_hits)

            self.total += len(hits)
            yield *scan_info, hits

    def progress_cb(self, increment=1):
        self.current += increment

    def reset(self):
        self.current = self.total = 0

    def get_progress(self):
        try:
            return int(self.current / (self.total + len(self.hits)) * 100)
        except ZeroDivisionError:
            return None