Current File : //proc/self/root/opt/imunify360/venv/lib/python3.11/site-packages/defence360agent/api/newsfeed.py
"""
This module gets and caches news from imunify blog
"""
import asyncio
import os
import socket
import time
import urllib.request
from urllib.error import HTTPError
from xml.etree import ElementTree
from contextlib import suppress
from logging import getLogger

from defence360agent.simple_rpc.hosting_panel import HostingPanel
from defence360agent.utils import retry_on

logger = getLogger(__file__)

RSS_FEED_REMOTE_URL = "https://blog.imunify360.com/rss.xml"
_TIMEOUT = 300  # default timeout for network operations here
TAGS_TO_READ = ["title", "pubDate", "guid", "link"]

__all__ = ["HTTPError", "NewsFeed"]


class NewsFeed:
    cache_ttl = 60  # in minutes
    cache_file_path = "/var/imunify360/tmp/feed_cache.rss"

    @classmethod
    @retry_on(
        (ElementTree.ParseError, urllib.request.URLError),
        max_tries=10,
        on_error=lambda *args: NewsFeed.clear_cache(*args),
    )
    async def get(cls):
        if cls._expired():
            await cls._refresh()

        category_info = PanelCategory(HostingPanel().NAME)
        with open(cls.cache_file_path) as cache_file:
            root = ElementTree.fromstring(cache_file.read())
            imunify_news = root.iter("item")
            return [
                {
                    child.tag: child.text
                    for child in item
                    if child.tag in TAGS_TO_READ
                }
                for item in imunify_news
                if category_info.is_allowed(item)
            ]

    @classmethod
    async def _refresh(cls):
        cache_file_dir_path = os.path.dirname(cls.cache_file_path)
        if not os.path.exists(cache_file_dir_path):
            os.makedirs(cache_file_dir_path)
        logger.info("Refresh news cache")
        with open(cls.cache_file_path, "wb") as cache_file:
            cache_file.write(await cls._fetch())

    @classmethod
    def _expired(cls):
        if os.path.exists(cls.cache_file_path):
            last_modified_time = os.path.getmtime(cls.cache_file_path)
        else:
            last_modified_time = 0
        cache_age = (time.time() - last_modified_time) / 60  # in minutes

        return cache_age > cls.cache_ttl

    @classmethod
    async def _fetch(cls, timeout=_TIMEOUT):
        return await asyncio.get_event_loop().run_in_executor(
            None, _fetch_url, RSS_FEED_REMOTE_URL, timeout
        )

    @classmethod
    async def clear_cache(cls, *args):
        logger.warning("Clearing cache due to error: %s", args)
        with suppress(FileNotFoundError):
            os.unlink(cls.cache_file_path)


def _fetch_url(url, timeout):
    try:
        # Cloudflare Browser Integrity Check blocks the default urllib
        # User-Agent. RSS feed URL was added to exceptions but they are
        # not free, so let's set a custom User-Agent anyway.
        headers = {"User-Agent": "imunify360-urllib/0.1"}
        req = urllib.request.Request(url, headers=headers)
        with urllib.request.urlopen(req, timeout=timeout) as response:
            return response.read()
    except socket.timeout:
        raise TimeoutError


class PanelCategory:
    # RSS news categories, value saved in xml category tag
    # categories are case-insensitive so lowercase it
    panel_categories = {"cpanel", "plesk", "directadmin"}
    no_panel_category = "standalone-imunify"

    def __init__(self, p_name):
        p_name = p_name.lower()
        self.current = (
            p_name
            if p_name in PanelCategory.panel_categories
            else PanelCategory.no_panel_category
        )
        self.competitors = PanelCategory.panel_categories | {
            PanelCategory.no_panel_category
        } - {self.current}

    def is_allowed(self, item):
        item_categories = {
            child.text for child in item if child.tag == "category"
        }
        # category tag can include not only exact panel name, but also some
        # phrase for SEO purpose, so check it by `in` on joined string
        joined_category = "|||".join(item_categories).lower()

        current_in_category = self.current in joined_category
        competitors_in_category = any(
            com in joined_category for com in self.competitors
        )
        # current panel didn't mentioned in categories,
        # but competitor was -> don't add to result
        return not competitors_in_category or current_in_category