From 9bc6f8d9e1822086076d455f50915a075d6b653a Mon Sep 17 00:00:00 2001 From: Darryl Nixon Date: Sun, 16 Jul 2023 09:30:36 -0700 Subject: [PATCH] MVP for testing --- .flake8 | 3 + melamine/classes.py | 124 +++++++++++++++++++++++++++++++ melamine/cli.py | 56 ++++++++++++++ melamine/fileops.py | 50 +++++++++++++ melamine/filesystems/__init__.py | 7 ++ melamine/filesystems/ext23.py | 78 +++++++++++++++++++ melamine/filesystems/zfs.py | 25 +++++++ melamine/logs.py | 35 +++++++++ melamine/shred.py | 43 +++++++++++ melamine/validators.py | 30 ++++++++ pyproject.toml | 11 ++- 11 files changed, 459 insertions(+), 3 deletions(-) create mode 100644 .flake8 create mode 100644 melamine/classes.py create mode 100644 melamine/fileops.py create mode 100644 melamine/filesystems/__init__.py create mode 100644 melamine/filesystems/ext23.py create mode 100644 melamine/filesystems/zfs.py create mode 100644 melamine/logs.py create mode 100644 melamine/shred.py create mode 100644 melamine/validators.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..dd0767d --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 160 +exclude = docs/*, .git, __pycache__, build diff --git a/melamine/classes.py b/melamine/classes.py new file mode 100644 index 0000000..b2ddf66 --- /dev/null +++ b/melamine/classes.py @@ -0,0 +1,124 @@ +import asyncio +import hashlib +from collections.abc import Generator +from pathlib import Path +from secrets import token_bytes +from typing import List +from typing import Union + +import aiofiles + +from .fileops import find_mount +from .logs import logger + + +class ShredDir: + """Class for tracking each directory to be shredded, and its contents.""" + + def __init__(self, path: Path) -> None: + self.absolute_path = path.resolve() + self.processed = False + self.contents = self._get_contents() + self.byte_size = sum(item.byte_size for item in self.contents) + self.mount_points = set(m for m in self.get_mount_points()) + self.mount_points.add(find_mount(self.absolute_path)) + self.fs_handler = None + + def _get_contents(self) -> List: + contents = [] + for subpath in self.absolute_path.glob("*"): + if subpath.is_dir(): + if subpath.is_symlink(): + logger.warning(f"Symlink subdirectory found: {subpath}, skipping") + continue + contents.append(ShredDir(subpath)) + elif subpath.is_file(): + contents.append(ShredFile(subpath)) + return contents + + def get_mount_points(self) -> Generator: + for item in self.contents: + if isinstance(item, ShredDir): + yield from item.get_mount_points() + yield self.mount_point + + async def shred(self, hash: bool = False, dryrun: bool = False) -> bool: + tasks = [] + for item in self.contents: + tasks.append(item.shred(hash, dryrun)) + return all(await asyncio.gather(*tasks)) + + def __hash__(self) -> int: + return hash(self.absolute_path) + + +class ShredFile: + """Class for tracking each file to be shredded.""" + + def __init__(self, path: Path) -> None: + self.absolute_path = path.resolve() + self.byte_size = path.stat().st_size + self.processed = False + self.mount_point = find_mount(self.absolute_path) + self.fs_handler = None + self.hardlinks = None + + async def shred(self, hash: bool = False, dryrun: bool = False) -> Union[bool, bytes]: + """Shred the file with a single file descriptor.""" + if not self.processed: + logger.error(f"File {self.absolute_path} tried to shred early.") + return False + try: + logger.info(f"Shredding file: {self.absolute_path}") + + async with aiofiles.open(self.absolute_path, "rb+") as file: + if hash: + sha1 = hashlib.sha1(usedforsecurity=False) + async for chunk in aiofiles.iterate(file): + sha1.update(chunk) + self.sha1 = sha1.digest() + logger.info(f"Got hash {sha1.hexdigest()}") + + # First pass: Overwrite with binary zeroes + logger.info("Performing first pass: Overwriting with binary zeroes") + await file.seek(0) + if not dryrun: + await file.write(b"\x00" * self.byte_size) + await file.flush() + + # Second pass: Overwrite with binary ones + logger.info("Performing second pass: Overwriting with binary ones") + await file.seek(0) + if not dryrun: + await file.write(b"\xff" * self.byte_size) + await file.flush() + + # Third pass: Overwrite with random data + logger.info("Performing third pass: Overwriting with random data") + await file.seek(0) + random_data = token_bytes(self.byte_size) + if not dryrun: + await file.write(random_data) + await file.flush() + + # Remove the file + logger.info(f"Removing file {self.absolute_path}") + + if not dryrun: + file.unlink() + + # Remove any hardlinks + if self.hardlinks: + logger.info(f"Removing {len(self.hardlinks)} hardlinks") + if not dryrun: + for link in self.hardlinks: + link.unlink() + + return True + + except Exception as e: + logger.error(f"File wipe failed: {e}") + return False + + def __hash__(self) -> int: + return hash(self.absolute_path) diff --git a/melamine/cli.py b/melamine/cli.py index e69de29..d2dda72 100644 --- a/melamine/cli.py +++ b/melamine/cli.py @@ -0,0 +1,56 @@ +import asyncio +from argparse import ArgumentParser + +import uvloop + +from .shred import main +from .validators import * + + +# flake8: noqa: E501 +def run() -> None: + validate_environment() + + parser = ArgumentParser(description="Comprehensive DoD 5220.22-M file shredder for Linux.") + parser.add_argument( + "--recursive", "-r", action="store_true", help="Process directories recursively. Default is false." + ) + parser.add_argument("--yes", "-y", action="store_true", help="Skip confirmation prompts. Default is false.") + parser.add_argument( + "--dryrun", "-d", action="store_true", help="Provide mock output without deleting anything. Default is false." + ) + parser.add_argument( + "--exhaustive", + "-e", + action="store_true", + help="Exhaustively check local mounts for duplicate files by hash. Default is false.", + ) + parser.add_argument( + "--ignoredir", + "-i", + action="append", + type=validate_file_folder, + default=[], + help="Specify directories to be ignored during the process. This option can be used multiple times.", + ) + parser.add_argument("--logfile", "-o", type=validate_logfile, help="Specify a file to log all output.") + parser.add_argument("--quiet", "-q", action="store_true", help="Silence all output.") + parser.add_argument("--verbose", "-v", action="store_true", help="Provide extra output for debugging.") + parser.add_argument( + "paths", + nargs="+", + type=validate_file_folder, + help="Specify any number of existing files or directories to be processed.", + ) + args = parser.parse_args() + + if sys.version_info >= (3, 11): + with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner: + runner.run(main(args)) + else: + uvloop.install() + asyncio.run(main(args)) + + +if __name__ == "__main__": + run() diff --git a/melamine/fileops.py b/melamine/fileops.py new file mode 100644 index 0000000..6b3fc35 --- /dev/null +++ b/melamine/fileops.py @@ -0,0 +1,50 @@ +import asyncio +from pathlib import Path +from typing import List + +from asyncstdlib.functools import lru_cache + +from .filesystems import FSHandlers +from .logs import logger + + +def find_mount(path: Path) -> Path: + """Find the mount point for a given path.""" + path = path.absolute() + while not path.is_mount(): + path = path.parent + return path + + +def get_all_mounts() -> List: + """Get a list of all mounted filesystems.""" + mounts = [] + with open("/proc/mounts", "r") as f: + for line in f: + mount = line.split()[1] + mounts.append(mount) + return mounts + + +@lru_cache(maxsize=1024) +async def mount_to_fs_handler(path: Path) -> str: + # TODO: This is a hacky way to get the filesystem type, but it works for now. + # Maybe with libblkid Python bindings? + proc = await asyncio.create_subprocess_exec( + "stat", "-f", "-L", "-c", "%T", str(path), stdout=asyncio.subprocess.PIPE, stdin=asyncio.subprocess.PIPE + ) + stdout, _ = await proc.communicate() + + if proc.returncode != 0: + err = f"Unable to get filesystem for {path}" + logger.error(err) + raise RuntimeError(err) + + fs = stdout.decode().strip() + + try: + return FSHandlers[fs] + except KeyError: + err = f"Unsupported filesystem: {fs}" + logger.error(err) + raise RuntimeError(err) diff --git a/melamine/filesystems/__init__.py b/melamine/filesystems/__init__.py new file mode 100644 index 0000000..a2bad44 --- /dev/null +++ b/melamine/filesystems/__init__.py @@ -0,0 +1,7 @@ +from .ext23 import EXT23Handler +from .zfs import ZFSHandler + +# from .btrfs import BTRFSHandler +# from .ext4 import EXT4Handler + +FSHandlers = {"zfs": ZFSHandler(), "ext2/ext3": EXT23Handler()} diff --git a/melamine/filesystems/ext23.py b/melamine/filesystems/ext23.py new file mode 100644 index 0000000..bb82d2d --- /dev/null +++ b/melamine/filesystems/ext23.py @@ -0,0 +1,78 @@ +import ctypes +from collections.abc import Generator +from pathlib import Path + + +class ext2_filsys(ctypes.Structure): + pass + + +class ext2_inode_scan(ctypes.Structure): + pass + + +class ext2_inode_large(ctypes.Structure): + _fields_ = [ + ("i_mode", ctypes.c_uint16), + ("i_uid", ctypes.c_uint16), + ("i_size", ctypes.c_uint32), + ("i_atime", ctypes.c_uint32), + ("i_ctime", ctypes.c_uint32), + ("i_mtime", ctypes.c_uint32), + ("i_dtime", ctypes.c_uint32), + ("i_gid", ctypes.c_uint16), + ("i_links_count", ctypes.c_uint16), + ("i_blocks", ctypes.c_uint32), + ("i_flags", ctypes.c_uint32), + ("i_osd1", ctypes.c_uint32 * 3), + ("i_block", ctypes.c_uint32 * 15), + ("i_generation", ctypes.c_uint32), + ("i_file_acl", ctypes.c_uint32), + ("i_dir_acl", ctypes.c_uint32), + ("i_faddr", ctypes.c_uint32), + ("i_osd2", ctypes.c_uint8 * 12), + ] + + +class ext2_inode_large_p(ctypes.POINTER(ext2_inode_large)): + pass + + +class EXT23Handler: + def __init__(self, fs: str) -> None: + self.fs = "ext2/ext3" + self.libext2fs = ctypes.CDLL("libext2fs.so.2") + self.libext2fs.ext2fs_open.restype = ctypes.c_int + self.libext2fs.ext2fs_open.argtypes = [ + ctypes.c_char_p, + ctypes.c_int, + ctypes.c_int, + ctypes.c_uint32, + ctypes.POINTER(ext2_filsys), + ] + self.libext2fs.ext2fs_close.argtypes = [ext2_filsys] + self.libext2fs.ext2fs_get_next_inode.argtypes = [ext2_inode_scan, ext2_inode_large_p] + self.libext2fs.ext2fs_get_next_inode.restype = ctypes.c_int + + async def get_hardlinks(self, path: Path) -> Generator: + path = path.resolve().absolute() + inode = path.stat().st_ino + + fs = ext2_filsys() + ret = self.libext2fs.ext2fs_open(path.encode(), 0, 0, 0, ctypes.byref(fs)) + if ret != 0: + return [] + + scan = ext2_inode_scan() + ret = self.libext2fs.ext2fs_open_inode_scan(fs, ctypes.byref(scan)) + if ret != 0: + self.libext2fs.ext2fs_close(fs) + return [] + + inode_large = ext2_inode_large() + while self.libext2fs.ext2fs_get_next_inode(scan, ctypes.byref(inode_large)) == 0: + if inode_large.i_links_count > 1 and inode_large.i_file_acl == inode: + yield Path(fs.fs_mount_point) / scan.name.decode() + + self.libext2fs.ext2fs_close_inode_scan(scan) + self.libext2fs.ext2fs_close(fs) diff --git a/melamine/filesystems/zfs.py b/melamine/filesystems/zfs.py new file mode 100644 index 0000000..8b4c6c6 --- /dev/null +++ b/melamine/filesystems/zfs.py @@ -0,0 +1,25 @@ +from collections.abc import Generator +from pathlib import Path + +import pyzfs + + +class ZFSHandler: + def __init__(self, fs: str) -> None: + self.fs = "zfs" + + async def get_hardlinks(self, path: Path) -> Generator: + path = path.resolve().absolute() + inode = path.stat().st_ino + + zfs = pyzfs.ZFS() + dataset = zfs.get_dataset_by_path(str(path)) + if dataset is not None: + pool = dataset.pool + filesystem = dataset.filesystem + fs = pool.open(filesystem) + + for snapshot in fs.snapshots(): + for entry in snapshot.ls(str(path)): + if entry.inode() == inode: + yield Path(entry.path()) diff --git a/melamine/logs.py b/melamine/logs.py new file mode 100644 index 0000000..b6006ad --- /dev/null +++ b/melamine/logs.py @@ -0,0 +1,35 @@ +from loguru import logger + + +logger.add( + "app.log", + format="{time:YYYY-MM-DD HH:mm:ss} | {message}", + level="INFO", + rotation="1 day", + retention="30 days", +) + +logger.add( + "errors.log", + format="ℹī¸ {time:YYYY-MM-DD HH:mm:ss} | {message}", + level="WARNING", + rotation="1 day", + retention="30 days", +) + +logger.add( + "error.log", + format="⛔ī¸ {time:YYYY-MM-DD HH:mm:ss} | {message}", + level="ERROR", + rotation="1 day", + retention="30 days", +) + + +logger.add( + "critical.log", + format="🚨 {time:YYYY-MM-DD HH:mm:ss} | {message}", + level="CRITICAL", + rotation="1 day", + retention="30 days", +) diff --git a/melamine/shred.py b/melamine/shred.py new file mode 100644 index 0000000..375c0fc --- /dev/null +++ b/melamine/shred.py @@ -0,0 +1,43 @@ +from .classes import ShredDir +from .classes import ShredFile +from .fileops import mount_to_fs +from .logs import logger + + +async def main(job) -> bool: + """ + This is the main function for processing a shred request. + It is called by the CLI and builds a job queue based on the arguments passed. + """ + new_paths = set() + + # Expand all directories and files, and collect mount point information + for path in job.paths: + if path.is_file(): + logger.info(f"Adding file: {path}") + shred_file = ShredFile(path) + shred_file.fs_handler = await mount_to_fs(shred_file.mount_point) + new_paths.add(shred_file) + elif path.is_dir(): + if job.recursive: + logger.info(f"Adding directory: {path}") + shred_dir = ShredDir(path) + shred_dir.fs_handler = await mount_to_fs(shred_dir.mount_point) + new_paths.add(shred_dir) + else: + logger.info(f"Skipping directory: {path} (try -r/--recursive)") + else: + raise TypeError(f"Not a file or directory: {path}") + job.paths = new_paths + + # Get hardlinks to subsequently unlink for all files + for path in job.paths: + if isinstance(path, ShredFile): + path.hardlinks = set(link async for link in path.fs_handler.get_hardlinks(path)) + + # Shred all physical files including hardlinks + for path in job.paths: + if isinstance(path, ShredFile): + await path.shred(job.hash, job.dryrun) + elif isinstance(path, ShredDir): + await path.shred(job.hash, job.dryrun) diff --git a/melamine/validators.py b/melamine/validators.py new file mode 100644 index 0000000..c300069 --- /dev/null +++ b/melamine/validators.py @@ -0,0 +1,30 @@ +import os +import platform +import sys +from pathlib import Path + + +def validate_file_folder(value: str) -> Path: + file_folder_path = Path(value) + if not file_folder_path.exists(): + raise FileNotFoundError(f"No such file or folder: {value}") + if not file_folder_path.is_file() and not file_folder_path.is_dir(): + raise TypeError(f"Not a file or directory: {value}") + return file_folder_path + + +def validate_logfile(value: str) -> Path: + logfile_path = Path(value) + if logfile_path.exists(): + confirm = input(f"The file {value} already exists. Do you want to overwrite it? ([y]es/[n]o): ") + if confirm.lower() not in ["yes", "y"]: + sys.exit(f"Exiting. Log file {value} will not be overwritten.") + return logfile_path + + +def validate_environment(): + if platform.system() != "Linux": + sys.exit("Error: This script must be run on a Linux system.") + + if os.getuid() != 0: + sys.exit("Error: This script must be run with sudo or root privileges.") diff --git a/pyproject.toml b/pyproject.toml index 798406a..9eb37f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,14 @@ description = "A comprehensive file shredder for Linux" readme = "README.md" requires-python = ">=3.9" license = { text = "MIT" } -dependencies = ["fastapi==0.95.2"] +dependencies = [ + "loguru==0.7.0", + "aiofiles==23.1.0", + "uvloop==0.17.0", + "pyzfs==0.2.3", + "asyncstdlib==3.10.8", + "psutil==5.9.5", +] [project.scripts] ghostforge_adduser = "melamine.cli:run" @@ -24,8 +31,6 @@ py-modules = ["melamine"] [tool.bandit] exclude_dirs = ["/doc", "/build"] -# TODO: Stop skipping B104 (binding on 0.0.0.0), is there a nice way to get a good docker bind address? -skips = ["B104"] [tool.black] line-length = 120