????
Current Path : /opt/imunify360/venv/lib64/python3.11/site-packages/imav/patchman/fs_scanner/ |
Current File : //opt/imunify360/venv/lib64/python3.11/site-packages/imav/patchman/fs_scanner/utils.py |
""" This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. Copyright © 2019 Cloud Linux Software Inc. This software is also available under ImunifyAV commercial license, see <https://www.imunify360.com/legal/eula> """ import hashlib import os import re from typing import Optional from .config import MAX_FILE_SIZE def dir_traverse_generator( target_dir: str, max_size: int = MAX_FILE_SIZE ) -> str: for root, dirs, files in os.walk(target_dir): for file in files: path = os.path.join(root, file) if os.path.getsize(path) <= max_size: yield os.path.join(root, file) def all_possible_relative_paths(abs_path: str, root_dir: str = "/") -> list: # accepts absolute file_path # returns list of all possible partial paths relative to root_dir # e.g., for (/a/b/c/d.txt, '/a/b/') it should return ['c/d.txt', 'd.txt'] rel_path = os.path.relpath(abs_path, root_dir or "/") if abs_path else "" path_parts = rel_path.strip(os.sep).split(os.sep) return [os.sep.join(path_parts[i:]) for i in range(len(path_parts))] def get_base_dir(abs_path: str, rel_path: str) -> str: # returns absolute path of base_dir such that os.path.join(base_dir, rel_path) == abs_path # e.g. get_base_dir('/a/b/c/d.txt', 'c/d.txt') should return '/a/b/' if not abs_path.endswith(rel_path): raise ValueError( f"rel_path '{rel_path}' is not a suffix of abs_path '{abs_path}'" ) return abs_path[: -len(rel_path)] class HashCalculator: HASHING_ALGORITHMS = { "md5": hashlib.md5, "sha256": hashlib.sha256, } BUFFER_SIZE = 8192 _consolidate_whitespace = re.compile(b"[\x20\x09-\x0d]+") _remove_control_characters = re.compile(b"[\x00-\x08\x0e-\x1f\x7f-\xff]+") def __init__(self, algorithm: str): self.algorithm = algorithm self._hasher_factory = self.HASHING_ALGORITHMS[algorithm] self._normalizer = ( self._normalize_data_for_md5 if algorithm == "md5" else lambda x: x ) @classmethod def _normalize_data_for_md5(cls, data: bytes) -> bytes: """ This method normalizes binary data by: - Removing the special control characters: 0x00-0x08, 0x0E-0x1F, 0x7F-0xFF; - 0x00-0x08, 0x0E-0x1F are ASCII control characters minus TAB, LF, VT, FF and CR; - 0x7F-0xFF are unicode control characters (DEL from C0 and C1 set). - Consolidating the clusters of whitespace characters (0x20, 0x09-0x0D) into single space character (0x20); - Turns all uppercase ASCII characters to lowercase. """ if not isinstance(data, bytes): raise TypeError("Normalization function expects bytes input") data = cls._remove_control_characters.sub(b"", data) data = cls._consolidate_whitespace.sub(b" ", data) return data.lower() def calc_hash( self, filepath: str, apply_normalization: bool = False ) -> Optional[str]: if not os.path.isfile(filepath): return with open(filepath, "rb") as file: normalized_data = ( self._normalizer(file.read()) if apply_normalization else file.read() ) hasher = self._hasher_factory() for chunk in ( normalized_data[i : i + self.BUFFER_SIZE] for i in range(0, len(normalized_data), self.BUFFER_SIZE) ): hasher.update(chunk) return hasher.hexdigest()