Current File : //opt/imunify360/venv/lib64/python3.11/site-packages/imav/malwarelib/plugins/store.py
"""
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.


This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
See the GNU General Public License for more details.


You should have received a copy of the GNU General Public License
 along with this program.  If not, see <https://www.gnu.org/licenses/>.

Copyright © 2019 Cloud Linux Software Inc.

This software is also available under ImunifyAV commercial license,
see <https://www.imunify360.com/legal/eula>
"""
import functools
import glob
import json
import os
import pwd
import re
from collections import defaultdict
from enum import Enum
from logging import getLogger
from typing import Any, Union

import defence360agent.internals.logger
from defence360agent.api import inactivity
from defence360agent.contracts.messages import MessageType
from defence360agent.contracts.plugins import (
    MessageSink,
    MessageSource,
    expect,
)
from defence360agent.model.simplification import run_in_executor
from defence360agent.subsys.panels.hosting_panel import HostingPanel
from defence360agent.utils import Scope, nice_iterator
from imav.malwarelib.config import (
    CLEANUP,
    CLEANUP_ON_SCHEDULE,
    MalwareEvent,
    MalwareEventPostponed,
    MalwareHitStatus,
    MalwareScanResourceType,
    MalwareScanType,
    NOTIFY,
)
from imav.malwarelib.model import (
    MalwareHit,
    MalwareHitAlternate,
    MalwareScan as MalwareScanModel,
)
from imav.malwarelib.plugins.detached_scan import (
    MalwareScanMessageInfo,
)
from imav.malwarelib.scan.mds.report import MalwareDatabaseHitInfo
from imav.malwarelib.subsys.malware import (
    HackerTrapHitsSaver,
    MalwareAction,
    MalwareActionIm360,
)

logger = getLogger(__name__)


class MalwareScanJSONEncoder(json.JSONEncoder):
    def default(self, o: Any) -> Any:
        if isinstance(o, Enum):
            return o.value
        return super().default(o)


class StoreMalwareHits(MessageSink, MessageSource):
    SCOPE = Scope.AV
    malware_action = MalwareAction
    _loop, _sink = None, None

    async def create_source(self, loop, sink):
        self._loop = loop
        self._sink = sink

    async def create_sink(self, loop):
        pass

    @expect(MessageType.MalwareScan, async_lock=False)
    async def process_hits(self, message):
        """MalwareScan is saved to DB when:
         1. Detached scan started - message has no results
         2. Any scan finished - message has summary and results
        Message without summary means that detached scan is finished
         and summary will arrive along with results in another message.
        """
        if not message["summary"].get("path"):
            return
        with inactivity.track.task("store_scan"):
            await self._store_scan(message)

    @expect(MessageType.MalwareScan)
    async def store_log(self, message):
        with defence360agent.internals.logger.openMalwareScanLog() as logf:
            json.dump(
                dict(summary=message["summary"]),
                logf,
                indent=2,
                sort_keys=False,
                cls=MalwareScanJSONEncoder,
            )

    @staticmethod
    def _store_hit(scanid, filename, status, resource_type, data):
        return MalwareHit.create(
            scanid=scanid,
            resource_type=resource_type,
            owner=data["owner"],
            user=data["user"],
            size=data["size"],
            hash=data["hash"],
            orig_file=filename,
            type=data["hits"][0]["matches"],
            timestamp=data["hits"][0]["timestamp"],
            status=status,
            malicious=not data["hits"][0]["suspicious"],
        )

    @staticmethod
    def get_outdated_entries(
        path_obj: Union[str, list],
        scan_type: str = None,
    ):
        """
        Return files that may already not be infected, yet we still
        consider them such.

        For example, an infected file might have been removed manually.
        """
        possibly_infected_statuses = [MalwareHitStatus.FOUND]
        paths = [path_obj] if isinstance(path_obj, str) else path_obj
        if scan_type == MalwareScanType.REALTIME:
            # to avoid duplicates (DEF-10404)
            yield from iter(paths)
            return
        for target_path in paths:
            for path in glob.iglob(target_path):
                path = os.path.realpath(path)
                if (
                    os.path.isfile(path)
                    and MalwareHit.select()
                    .where(
                        (MalwareHit.orig_file == path)
                        & (MalwareHit.status.in_(possibly_infected_statuses))
                        & (
                            MalwareHit.resource_type
                            == MalwareScanResourceType.FILE.value
                        )
                    )
                    .first()
                ):
                    yield path
                else:
                    scanned_dir = re.escape(path) + r"(/.*|\b)"
                    yield from (
                        i.orig_file
                        for i in MalwareHit.select().where(
                            (MalwareHit.orig_file.regexp(scanned_dir))
                            & (
                                MalwareHit.status.in_(
                                    possibly_infected_statuses
                                )
                            )
                            & (
                                MalwareHit.resource_type
                                == MalwareScanResourceType.FILE.value
                            )
                        )
                    )

    async def _store_scan(self, message: MessageType.MalwareScan) -> None:
        """Process scan message results.

        message: MalwareScan message
        """
        summary = message["summary"]
        if not summary["started"]:
            # Scan is queued/aborted.
            return

        message_type = MalwareScanMessageInfo(message)
        if message_type.is_summary:
            if not (
                MalwareScanModel.select()
                .where(MalwareScanModel.scanid == message["summary"]["scanid"])
                .exists()
            ):
                scan = MalwareScanModel.create(
                    **summary,
                    resource_type=MalwareScanResourceType.FILE.value,
                    initiator=message.initiator,
                )
                scan.total_malicious = 0
                scan.save()
            else:
                logger.warning(
                    "Scan %s already in database", message["summary"]["scanid"]
                )
        else:
            await self._store_scan_from_results(message)

    @classmethod
    def _delete_outdated_entries(cls, summary: dict) -> None:
        file_patterns = summary.pop("file_patterns", None)
        exclude_patterns = summary.pop("exclude_patterns", None)
        if (
            summary.get("error") is None
            and file_patterns is None
            and exclude_patterns is None
        ):
            outdated_entries = cls.get_outdated_entries(
                summary["path"], scan_type=summary["type"]
            )
            MalwareHit.delete_hits(outdated_entries)

    @staticmethod
    async def _process_default_action_results(
        hit_data, default_action_results
    ):
        pass

    async def _store_scan_from_results(self, message: MessageType.MalwareScan):
        summary = message["summary"]
        results = message["results"]
        scan_id = summary["scanid"]
        scan, created = MalwareScanModel.get_or_create(
            scanid=scan_id,
            defaults={
                **summary,
                "resource_type": MalwareScanResourceType.FILE.value,
            },
        )
        if not created:
            # Detached scan only (second message).
            # Update completed time if scan already exists.
            scan.completed = summary["completed"]

        # get('path') indicates that this is the second message,
        # even if they are out of order
        if results is not None and summary.get("path") is not None:
            self._delete_outdated_entries(summary)

        hits = {
            hit.orig_file: hit
            for hit in MalwareHit.get_hits(files=list(results))
        }
        postponed_hits = defaultdict(list)  # type: dict
        total_malicious = 0

        def _hit_status_race_detected(hit: MalwareHit, detected_timestamp):
            return (
                hit.status == MalwareHitStatus.CLEANUP_STARTED
                or hit.status
                in (
                    MalwareHitStatus.CLEANUP_DONE,
                    MalwareHitStatus.CLEANUP_REMOVED,
                )
                and hit.cleaned_at > detected_timestamp
            )

        # ignore hits are already processed by another scan
        # to avoid send its to CH multiple times
        async for file in nice_iterator(tuple(results.keys())):
            if file in hits and _hit_status_race_detected(
                hits[file], results[file]["hits"][0]["timestamp"]
            ):
                results.pop(file)

        malicious_hits = [
            MalwareHitAlternate.create(scan.scanid, file, data)
            for file, data in results.items()
            if not data["hits"][0]["suspicious"]
        ]

        action_results = await self.malware_action.apply_default_action(
            hits=malicious_hits,
            initiator=message.get("initiator"),
            cause=summary["type"],
            sink=self._sink,
        )

        apply_dict = {}
        for hit_info, event, action, try_restore in action_results:
            apply_dict[hit_info.orig_file] = (event, action, try_restore)

        for file, data in results.items():
            status = MalwareHitStatus.FOUND
            result = None
            if file in apply_dict:
                (
                    result,
                    default_action,
                    try_restore,
                ) = apply_dict[file]

                # sent to CH
                if (
                    isinstance(result, MalwareEventPostponed)
                    and result.action == CLEANUP_ON_SCHEDULE
                ):
                    # report to CH only well-known `cleanup` / `notify` actions
                    default_action = (
                        CLEANUP
                        if summary["type"] == MalwareScanType.BACKGROUND
                        else NOTIFY
                    )
                data["default_action"] = default_action
                data["try_restore"] = try_restore

                total_malicious += 1

                if isinstance(result, MalwareEvent):
                    if result.malware_eliminated:
                        continue

            hit = await run_in_executor(
                self._loop,
                functools.partial(
                    self._store_hit,
                    scan.scanid,
                    file,
                    status,
                    MalwareScanResourceType.FILE.value,
                    data,
                ),
            )

            if isinstance(result, MalwareEventPostponed):
                key = (
                    result.message,
                    (
                        result.cause,
                        result.initiator,
                        result.post_action,
                        result.action,
                    ),
                )
                postponed_hits[key].append(hit)

        scan.total_malicious = total_malicious
        scan.total_resources = summary["total_files"]
        if error := summary.get("error"):
            scan.error = error
        scan.save()

        if self._sink:
            for (
                (msg_cls, (cause, initiator, post_action, action)),
                hits,
            ) in postponed_hits.items():
                if (
                    action == CLEANUP_ON_SCHEDULE
                    and summary["type"] != MalwareScanType.BACKGROUND
                ):
                    logger.info(
                        "Skipping auto-cleanup because it's allowed for "
                        "scheduled scans only"
                    )
                else:
                    await self._sink.process_message(
                        msg_cls(
                            hits=hits,
                            scan_id=scan_id,
                            cause=cause,
                            initiator=initiator,
                            post_action=post_action,
                        )
                    )

        await self._process_default_action_results(
            results,
            {hit.orig_file: event for hit, event, _, _ in action_results},
        )


class StoreMalwareHitsIm360(StoreMalwareHits):
    SCOPE = Scope.IM360
    malware_action = MalwareActionIm360

    async def create_sink(self, loop):
        await super().create_sink(loop)
        await HackerTrapHitsSaver.init()

    @staticmethod
    async def _process_default_action_results(
        hit_data, default_action_results
    ):
        """Do additional processing for malicious files"""

        hacker_trap_hits = []
        hacker_trap_sa_hits = []

        for path, data in hit_data.items():
            result = default_action_results.get(path)
            if not isinstance(result, MalwareEvent):
                continue
            if result.malware_eliminated:
                hacker_trap_hits.append(path)

            if any(
                HackerTrapHitsSaver.STANDALONE_MARK in hit["matches"]
                for hit in data["hits"]
            ):
                hacker_trap_sa_hits.append(path)

        await HackerTrapHitsSaver.add_hits(hacker_trap_hits)
        await HackerTrapHitsSaver.update_sa_hits(hacker_trap_sa_hits, [])

    @expect(MessageType.MalwareDatabaseScan)
    async def store_db_scan(self, message: MessageType.MalwareDatabaseScan):
        if not message.started or message.type is None:
            # Scan is queued/aborted or stopped while AVD is not finished yet
            return

        scan = MalwareScanModel.create(
            scanid=message.scan_id,
            started=message.started,
            completed=message.completed,
            type=message.type,
            path=message.path,
            error=message.error,
            total_resources=message.total_resources,
            total_malicious=message.total_malicious,
            resource_type=MalwareScanResourceType.DB.value,
            initiator=message.initiator,
        )
        if not message.hits:
            return
        # FIXME: remove this mapping
        # when we start to store UID instead of username in the db
        panel_users = set(await HostingPanel().get_users())
        uid_to_name = {
            pw.pw_uid: pw.pw_name
            for pw in pwd.getpwall()
            if pw.pw_name in panel_users
        }

        unique_db_hits = MalwareDatabaseHitInfo.get_hits_per_db(message.hits)
        self._delete_outdated_db_entries(unique_db_hits)

        # apply default action to all hits (to store them in history table)
        action_results = await self.malware_action.apply_default_action(
            hits=message.hits,
            initiator=message.get("initiator"),
            cause=message.get("type"),
            sink=self._sink,
            resource_type=MalwareScanResourceType.DB.value,
        )

        apply_dict = {}
        for hit, event, action, _ in action_results:
            apply_dict[hit.path] = (event, action)

        postponed_hits = defaultdict(list)  # type: dict

        for hit_info in unique_db_hits:
            result = None
            if hit_info.path in apply_dict:
                (
                    result,
                    default_action,
                ) = apply_dict[hit_info.path]

                # FIXME: DEF-18112 add default_action to hit and send to CH

                if isinstance(result, MalwareEvent):
                    if result.malware_eliminated:
                        continue

            new_hit: MalwareHit = MalwareHit.create(
                scanid=scan,
                owner=uid_to_name.get(hit_info.owner, hit_info.owner),
                user=uid_to_name.get(hit_info.user, hit_info.user),
                orig_file=hit_info.path,
                type=hit_info.signature,
                malicious=True,
                hash=None,
                size=None,
                timestame=None,
                status=MalwareHitStatus.FOUND,
                cleaned_at=None,
                resource_type=MalwareScanResourceType.DB.value,
                app_name=hit_info.app_name,
                db_host=hit_info.db_host,
                db_port=hit_info.db_port,
                db_name=hit_info.db_name,
                snippet=hit_info.snippet,
            )
            if isinstance(result, MalwareEventPostponed):
                key = (
                    result.message,
                    (result.cause, result.initiator, result.post_action),
                )
                postponed_hits[key].append(new_hit)

        if self._sink:
            for (
                (msg_cls, (cause, initiator, post_action)),
                hits,
            ) in postponed_hits.items():
                await self._sink.process_message(
                    msg_cls(
                        hits=hits,
                        scan_id=message.scan_id,
                        cause=cause,
                        initiator=initiator,
                        post_action=post_action,
                    )
                )

    @staticmethod
    def _delete_outdated_db_entries(hits):
        orig_files = [hit.path for hit in hits]
        MalwareHit.delete_hits(orig_files)