????
Current Path : /opt/imunify360/venv/lib64/python3.11/site-packages/imav/patchman/fs_scanner/ |
Current File : //opt/imunify360/venv/lib64/python3.11/site-packages/imav/patchman/fs_scanner/matcher.py |
""" This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. Copyright © 2019 Cloud Linux Software Inc. This software is also available under ImunifyAV commercial license, see <https://www.imunify360.com/legal/eula> """ import abc import json import os from collections import defaultdict from dataclasses import dataclass, field from typing import NamedTuple, Optional from .db import DB, PatchDependencyMatch, VersionMatch from .utils import HashCalculator, get_base_dir class FileIdentifier(NamedTuple): rel_path: str hash: str vuln_id: Optional[int] = None vuln_type: Optional[int] = None @dataclass class VersionIdentifier: id: int hash: str file_identifiers: list[FileIdentifier] # one identifier can match multiple base_dirs, need to keep track of them to avoid duplicate scanning matched_base_dirs: set = field(default_factory=set) @dataclass class PatchDependency: files: list[FileIdentifier] class Matcher(abc.ABC): def __init__(self, input_file: str): self.dict_of_identifiers = self._parse_input(input_file) @abc.abstractmethod def _parse_input(self, file_path: str) -> dict[str, list[tuple]]: pass class VersionsMatcher(Matcher): @staticmethod def _parse_path_hash_pairs(file_hashes: str) -> list[FileIdentifier]: # accepts file_hashes string like [<file_hash>|<file_path>|]*n # returns list of FileIdentifier objects parts = file_hashes.strip().split("|") return [ FileIdentifier(rel_path, hash_) for rel_path, hash_ in zip(parts[1::2], parts[::2]) ] def _parse_line(self, line: str) -> Optional[VersionIdentifier]: # each line is made up as <state>:<id>:<reporting_hash>:[file_hashes] # and <file_hashes> is a list of "<file_hash>|<file_path>|" pairs state, id_, hash_, file_hashes = line.strip().split(":") if state != "+": return None return VersionIdentifier( id_, hash_, self._parse_path_hash_pairs(file_hashes) ) def _parse_input( self, file_path: str ) -> dict[str, list[VersionIdentifier]]: # reads file version_identifiers with contents like # +:10831:38ed3878c51c61af938cd4fd9228b23b:ad8d2ec0797fbe584a2f5c1e0985b188|classes/Product.php|e890fa7432bbe7bee4dcbbff1009ca4b|app/AppKernel.php| plugins_identifiers_by_path: dict[ str, list[VersionIdentifier] ] = defaultdict(list) with open(file_path, "r") as file: for line in file: if new_identifier := self._parse_line(line): plugins_identifiers_by_path[ new_identifier.file_identifiers[0].rel_path ].append(new_identifier) return plugins_identifiers_by_path def has_full_match( self, plugin_identifier: VersionIdentifier, base_dir: str, hash_calculator: HashCalculator, ) -> bool: # 1) check that all files from file_identifiers exist in their paths relative to base_dir for file_identifier in plugin_identifier.file_identifiers: if not os.path.isfile( os.path.join(base_dir, file_identifier.rel_path) ): return False # 2) all files exist, now check their hashes for file_identifier in plugin_identifier.file_identifiers: if ( hash_calculator.calc_hash( os.path.join(base_dir, file_identifier.rel_path), apply_normalization=True, ) != file_identifier.hash ): return False return True def match_and_save( self, full_path: str, relative_path: str, db: DB, hash_calculator: HashCalculator, ): is_matched = False # check if we have any version_identifier matching given path for plugin_identifier in self.dict_of_identifiers.get( relative_path, [] ): base_dir = get_base_dir(full_path, relative_path) # skip if we already have matched this base_dir with this plugin_identifier if ( base_dir not in plugin_identifier.matched_base_dirs and self.has_full_match( plugin_identifier, base_dir, hash_calculator ) ): plugin_identifier.matched_base_dirs.add(base_dir) db.versions_matches.buffered_insert( VersionMatch( id=plugin_identifier.id, path=base_dir, hash=plugin_identifier.hash, ) ) is_matched = True return is_matched class PatchDependenciesMatcher(Matcher): def _parse_input(self, file_path: str) -> dict[str, list[PatchDependency]]: # read patch_dependencies file # each line represent a patch dependency and is made of a list of FileToPatch objects, like: # +[{"hash": "(...)", "checksum": "(...)", "vulnerability_type": 10, "vulnerability_id": 4346, \ # "filename": "popup-builder/com/helpers/AdminHelper.php"}, \ # {"hash": "(...)", "checksum": "(...)", "vulnerability_type": 10, "vulnerability_id": 4347, \ # "filename": "popup-builder/com/classes/Ajax.php"}] # we should consider only those lines starting with "+" patch_deps: dict[str, list[PatchDependency]] = defaultdict(list) with open(file_path, "r") as file: for line in file: state, data = line[0], line[1:] if state != "+": continue patch_dependency = PatchDependency( files=[ FileIdentifier( rel_path=_["filename"], hash=_["hash"], vuln_id=_["vulnerability_id"], vuln_type=_["vulnerability_type"], ) for _ in json.loads(data) ] ) for file_identifier in patch_dependency.files: patch_deps[file_identifier.rel_path].append( patch_dependency ) return patch_deps def match_and_save( self, full_path: str, relative_path: str, db: DB, hash_calculator: HashCalculator, ): is_matched = False for patch_dependency in self.dict_of_identifiers.get( relative_path, [] ): base_dir = get_base_dir(full_path, relative_path) # for each matching file add PatchDependencyMatch to db # if all files matching patch_dependency are found, set dependencies_met=True to all of them matches_to_insert = [] # [(path, hash, vuln_id, vuln_type), ...] for file_identifier in patch_dependency.files: if ( file_identifier.rel_path == relative_path and hash_calculator.calc_hash( os.path.join(base_dir, file_identifier.rel_path), ) == file_identifier.hash ): # todo: fix duplicates in PatchDependencyMatch table: add a constraint in table # and make a common dict for all the file_identifiers to eliminate duplicates in ram matches_to_insert.append( ( os.path.join(base_dir, file_identifier.rel_path), file_identifier.hash, file_identifier.vuln_id, file_identifier.vuln_type, ) ) is_matched = True # if all files matched, set dependencies_met=True matches_to_insert = [ PatchDependencyMatch( *row, dependencies_met=( len(matches_to_insert) == len(patch_dependency.files) ), ) for row in matches_to_insert ] [ db.patch_dependencies.buffered_insert(match) for match in matches_to_insert ] return is_matched