diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..dd0767d
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,3 @@
+[flake8]
+max-line-length = 160
+exclude = docs/*, .git, __pycache__, build
diff --git a/melamine/classes.py b/melamine/classes.py
new file mode 100644
index 0000000..b2ddf66
--- /dev/null
+++ b/melamine/classes.py
@@ -0,0 +1,124 @@
+import asyncio
+import hashlib
+from collections.abc import Generator
+from pathlib import Path
+from secrets import token_bytes
+from typing import List
+from typing import Union
+
+import aiofiles
+
+from .fileops import find_mount
+from .logs import logger
+
+
+class ShredDir:
+ """Class for tracking each directory to be shredded, and its contents."""
+
+ def __init__(self, path: Path) -> None:
+ self.absolute_path = path.resolve()
+ self.processed = False
+ self.contents = self._get_contents()
+ self.byte_size = sum(item.byte_size for item in self.contents)
+ self.mount_points = set(m for m in self.get_mount_points())
+ self.mount_points.add(find_mount(self.absolute_path))
+ self.fs_handler = None
+
+ def _get_contents(self) -> List:
+ contents = []
+ for subpath in self.absolute_path.glob("*"):
+ if subpath.is_dir():
+ if subpath.is_symlink():
+ logger.warning(f"Symlink subdirectory found: {subpath}, skipping")
+ continue
+ contents.append(ShredDir(subpath))
+ elif subpath.is_file():
+ contents.append(ShredFile(subpath))
+ return contents
+
+ def get_mount_points(self) -> Generator:
+ for item in self.contents:
+ if isinstance(item, ShredDir):
+ yield from item.get_mount_points()
+ yield self.mount_point
+
+ async def shred(self, hash: bool = False, dryrun: bool = False) -> bool:
+ tasks = []
+ for item in self.contents:
+ tasks.append(item.shred(hash, dryrun))
+ return all(await asyncio.gather(*tasks))
+
+ def __hash__(self) -> int:
+ return hash(self.absolute_path)
+
+
+class ShredFile:
+ """Class for tracking each file to be shredded."""
+
+ def __init__(self, path: Path) -> None:
+ self.absolute_path = path.resolve()
+ self.byte_size = path.stat().st_size
+ self.processed = False
+ self.mount_point = find_mount(self.absolute_path)
+ self.fs_handler = None
+ self.hardlinks = None
+
+ async def shred(self, hash: bool = False, dryrun: bool = False) -> Union[bool, bytes]:
+ """Shred the file with a single file descriptor."""
+ if not self.processed:
+ logger.error(f"File {self.absolute_path} tried to shred early.")
+ return False
+ try:
+ logger.info(f"Shredding file: {self.absolute_path}")
+
+ async with aiofiles.open(self.absolute_path, "rb+") as file:
+ if hash:
+ sha1 = hashlib.sha1(usedforsecurity=False)
+ async for chunk in aiofiles.iterate(file):
+ sha1.update(chunk)
+ self.sha1 = sha1.digest()
+ logger.info(f"Got hash {sha1.hexdigest()}")
+
+ # First pass: Overwrite with binary zeroes
+ logger.info("Performing first pass: Overwriting with binary zeroes")
+ await file.seek(0)
+ if not dryrun:
+ await file.write(b"\x00" * self.byte_size)
+ await file.flush()
+
+ # Second pass: Overwrite with binary ones
+ logger.info("Performing second pass: Overwriting with binary ones")
+ await file.seek(0)
+ if not dryrun:
+ await file.write(b"\xff" * self.byte_size)
+ await file.flush()
+
+ # Third pass: Overwrite with random data
+ logger.info("Performing third pass: Overwriting with random data")
+ await file.seek(0)
+ random_data = token_bytes(self.byte_size)
+ if not dryrun:
+ await file.write(random_data)
+ await file.flush()
+
+ # Remove the file
+ logger.info(f"Removing file {self.absolute_path}")
+
+ if not dryrun:
+ file.unlink()
+
+ # Remove any hardlinks
+ if self.hardlinks:
+ logger.info(f"Removing {len(self.hardlinks)} hardlinks")
+ if not dryrun:
+ for link in self.hardlinks:
+ link.unlink()
+
+ return True
+
+ except Exception as e:
+ logger.error(f"File wipe failed: {e}")
+ return False
+
+ def __hash__(self) -> int:
+ return hash(self.absolute_path)
diff --git a/melamine/cli.py b/melamine/cli.py
index e69de29..d2dda72 100644
--- a/melamine/cli.py
+++ b/melamine/cli.py
@@ -0,0 +1,56 @@
+import asyncio
+from argparse import ArgumentParser
+
+import uvloop
+
+from .shred import main
+from .validators import *
+
+
+# flake8: noqa: E501
+def run() -> None:
+ validate_environment()
+
+ parser = ArgumentParser(description="Comprehensive DoD 5220.22-M file shredder for Linux.")
+ parser.add_argument(
+ "--recursive", "-r", action="store_true", help="Process directories recursively. Default is false."
+ )
+ parser.add_argument("--yes", "-y", action="store_true", help="Skip confirmation prompts. Default is false.")
+ parser.add_argument(
+ "--dryrun", "-d", action="store_true", help="Provide mock output without deleting anything. Default is false."
+ )
+ parser.add_argument(
+ "--exhaustive",
+ "-e",
+ action="store_true",
+ help="Exhaustively check local mounts for duplicate files by hash. Default is false.",
+ )
+ parser.add_argument(
+ "--ignoredir",
+ "-i",
+ action="append",
+ type=validate_file_folder,
+ default=[],
+ help="Specify directories to be ignored during the process. This option can be used multiple times.",
+ )
+ parser.add_argument("--logfile", "-o", type=validate_logfile, help="Specify a file to log all output.")
+ parser.add_argument("--quiet", "-q", action="store_true", help="Silence all output.")
+ parser.add_argument("--verbose", "-v", action="store_true", help="Provide extra output for debugging.")
+ parser.add_argument(
+ "paths",
+ nargs="+",
+ type=validate_file_folder,
+ help="Specify any number of existing files or directories to be processed.",
+ )
+ args = parser.parse_args()
+
+ if sys.version_info >= (3, 11):
+ with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner:
+ runner.run(main(args))
+ else:
+ uvloop.install()
+ asyncio.run(main(args))
+
+
+if __name__ == "__main__":
+ run()
diff --git a/melamine/fileops.py b/melamine/fileops.py
new file mode 100644
index 0000000..6b3fc35
--- /dev/null
+++ b/melamine/fileops.py
@@ -0,0 +1,50 @@
+import asyncio
+from pathlib import Path
+from typing import List
+
+from asyncstdlib.functools import lru_cache
+
+from .filesystems import FSHandlers
+from .logs import logger
+
+
+def find_mount(path: Path) -> Path:
+ """Find the mount point for a given path."""
+ path = path.absolute()
+ while not path.is_mount():
+ path = path.parent
+ return path
+
+
+def get_all_mounts() -> List:
+ """Get a list of all mounted filesystems."""
+ mounts = []
+ with open("/proc/mounts", "r") as f:
+ for line in f:
+ mount = line.split()[1]
+ mounts.append(mount)
+ return mounts
+
+
+@lru_cache(maxsize=1024)
+async def mount_to_fs_handler(path: Path) -> str:
+ # TODO: This is a hacky way to get the filesystem type, but it works for now.
+ # Maybe with libblkid Python bindings?
+ proc = await asyncio.create_subprocess_exec(
+ "stat", "-f", "-L", "-c", "%T", str(path), stdout=asyncio.subprocess.PIPE, stdin=asyncio.subprocess.PIPE
+ )
+ stdout, _ = await proc.communicate()
+
+ if proc.returncode != 0:
+ err = f"Unable to get filesystem for {path}"
+ logger.error(err)
+ raise RuntimeError(err)
+
+ fs = stdout.decode().strip()
+
+ try:
+ return FSHandlers[fs]
+ except KeyError:
+ err = f"Unsupported filesystem: {fs}"
+ logger.error(err)
+ raise RuntimeError(err)
diff --git a/melamine/filesystems/__init__.py b/melamine/filesystems/__init__.py
new file mode 100644
index 0000000..a2bad44
--- /dev/null
+++ b/melamine/filesystems/__init__.py
@@ -0,0 +1,7 @@
+from .ext23 import EXT23Handler
+from .zfs import ZFSHandler
+
+# from .btrfs import BTRFSHandler
+# from .ext4 import EXT4Handler
+
+FSHandlers = {"zfs": ZFSHandler(), "ext2/ext3": EXT23Handler()}
diff --git a/melamine/filesystems/ext23.py b/melamine/filesystems/ext23.py
new file mode 100644
index 0000000..bb82d2d
--- /dev/null
+++ b/melamine/filesystems/ext23.py
@@ -0,0 +1,78 @@
+import ctypes
+from collections.abc import Generator
+from pathlib import Path
+
+
+class ext2_filsys(ctypes.Structure):
+ pass
+
+
+class ext2_inode_scan(ctypes.Structure):
+ pass
+
+
+class ext2_inode_large(ctypes.Structure):
+ _fields_ = [
+ ("i_mode", ctypes.c_uint16),
+ ("i_uid", ctypes.c_uint16),
+ ("i_size", ctypes.c_uint32),
+ ("i_atime", ctypes.c_uint32),
+ ("i_ctime", ctypes.c_uint32),
+ ("i_mtime", ctypes.c_uint32),
+ ("i_dtime", ctypes.c_uint32),
+ ("i_gid", ctypes.c_uint16),
+ ("i_links_count", ctypes.c_uint16),
+ ("i_blocks", ctypes.c_uint32),
+ ("i_flags", ctypes.c_uint32),
+ ("i_osd1", ctypes.c_uint32 * 3),
+ ("i_block", ctypes.c_uint32 * 15),
+ ("i_generation", ctypes.c_uint32),
+ ("i_file_acl", ctypes.c_uint32),
+ ("i_dir_acl", ctypes.c_uint32),
+ ("i_faddr", ctypes.c_uint32),
+ ("i_osd2", ctypes.c_uint8 * 12),
+ ]
+
+
+class ext2_inode_large_p(ctypes.POINTER(ext2_inode_large)):
+ pass
+
+
+class EXT23Handler:
+ def __init__(self, fs: str) -> None:
+ self.fs = "ext2/ext3"
+ self.libext2fs = ctypes.CDLL("libext2fs.so.2")
+ self.libext2fs.ext2fs_open.restype = ctypes.c_int
+ self.libext2fs.ext2fs_open.argtypes = [
+ ctypes.c_char_p,
+ ctypes.c_int,
+ ctypes.c_int,
+ ctypes.c_uint32,
+ ctypes.POINTER(ext2_filsys),
+ ]
+ self.libext2fs.ext2fs_close.argtypes = [ext2_filsys]
+ self.libext2fs.ext2fs_get_next_inode.argtypes = [ext2_inode_scan, ext2_inode_large_p]
+ self.libext2fs.ext2fs_get_next_inode.restype = ctypes.c_int
+
+ async def get_hardlinks(self, path: Path) -> Generator:
+ path = path.resolve().absolute()
+ inode = path.stat().st_ino
+
+ fs = ext2_filsys()
+ ret = self.libext2fs.ext2fs_open(path.encode(), 0, 0, 0, ctypes.byref(fs))
+ if ret != 0:
+ return []
+
+ scan = ext2_inode_scan()
+ ret = self.libext2fs.ext2fs_open_inode_scan(fs, ctypes.byref(scan))
+ if ret != 0:
+ self.libext2fs.ext2fs_close(fs)
+ return []
+
+ inode_large = ext2_inode_large()
+ while self.libext2fs.ext2fs_get_next_inode(scan, ctypes.byref(inode_large)) == 0:
+ if inode_large.i_links_count > 1 and inode_large.i_file_acl == inode:
+ yield Path(fs.fs_mount_point) / scan.name.decode()
+
+ self.libext2fs.ext2fs_close_inode_scan(scan)
+ self.libext2fs.ext2fs_close(fs)
diff --git a/melamine/filesystems/zfs.py b/melamine/filesystems/zfs.py
new file mode 100644
index 0000000..8b4c6c6
--- /dev/null
+++ b/melamine/filesystems/zfs.py
@@ -0,0 +1,25 @@
+from collections.abc import Generator
+from pathlib import Path
+
+import pyzfs
+
+
+class ZFSHandler:
+ def __init__(self, fs: str) -> None:
+ self.fs = "zfs"
+
+ async def get_hardlinks(self, path: Path) -> Generator:
+ path = path.resolve().absolute()
+ inode = path.stat().st_ino
+
+ zfs = pyzfs.ZFS()
+ dataset = zfs.get_dataset_by_path(str(path))
+ if dataset is not None:
+ pool = dataset.pool
+ filesystem = dataset.filesystem
+ fs = pool.open(filesystem)
+
+ for snapshot in fs.snapshots():
+ for entry in snapshot.ls(str(path)):
+ if entry.inode() == inode:
+ yield Path(entry.path())
diff --git a/melamine/logs.py b/melamine/logs.py
new file mode 100644
index 0000000..b6006ad
--- /dev/null
+++ b/melamine/logs.py
@@ -0,0 +1,35 @@
+from loguru import logger
+
+
+logger.add(
+ "app.log",
+ format="{time:YYYY-MM-DD HH:mm:ss} | {message}",
+ level="INFO",
+ rotation="1 day",
+ retention="30 days",
+)
+
+logger.add(
+ "errors.log",
+ format="âšī¸ {time:YYYY-MM-DD HH:mm:ss} | {message}",
+ level="WARNING",
+ rotation="1 day",
+ retention="30 days",
+)
+
+logger.add(
+ "error.log",
+ format="âī¸ {time:YYYY-MM-DD HH:mm:ss} | {message}",
+ level="ERROR",
+ rotation="1 day",
+ retention="30 days",
+)
+
+
+logger.add(
+ "critical.log",
+ format="đ¨ {time:YYYY-MM-DD HH:mm:ss} | {message}",
+ level="CRITICAL",
+ rotation="1 day",
+ retention="30 days",
+)
diff --git a/melamine/shred.py b/melamine/shred.py
new file mode 100644
index 0000000..375c0fc
--- /dev/null
+++ b/melamine/shred.py
@@ -0,0 +1,43 @@
+from .classes import ShredDir
+from .classes import ShredFile
+from .fileops import mount_to_fs
+from .logs import logger
+
+
+async def main(job) -> bool:
+ """
+ This is the main function for processing a shred request.
+ It is called by the CLI and builds a job queue based on the arguments passed.
+ """
+ new_paths = set()
+
+ # Expand all directories and files, and collect mount point information
+ for path in job.paths:
+ if path.is_file():
+ logger.info(f"Adding file: {path}")
+ shred_file = ShredFile(path)
+ shred_file.fs_handler = await mount_to_fs(shred_file.mount_point)
+ new_paths.add(shred_file)
+ elif path.is_dir():
+ if job.recursive:
+ logger.info(f"Adding directory: {path}")
+ shred_dir = ShredDir(path)
+ shred_dir.fs_handler = await mount_to_fs(shred_dir.mount_point)
+ new_paths.add(shred_dir)
+ else:
+ logger.info(f"Skipping directory: {path} (try -r/--recursive)")
+ else:
+ raise TypeError(f"Not a file or directory: {path}")
+ job.paths = new_paths
+
+ # Get hardlinks to subsequently unlink for all files
+ for path in job.paths:
+ if isinstance(path, ShredFile):
+ path.hardlinks = set(link async for link in path.fs_handler.get_hardlinks(path))
+
+ # Shred all physical files including hardlinks
+ for path in job.paths:
+ if isinstance(path, ShredFile):
+ await path.shred(job.hash, job.dryrun)
+ elif isinstance(path, ShredDir):
+ await path.shred(job.hash, job.dryrun)
diff --git a/melamine/validators.py b/melamine/validators.py
new file mode 100644
index 0000000..c300069
--- /dev/null
+++ b/melamine/validators.py
@@ -0,0 +1,30 @@
+import os
+import platform
+import sys
+from pathlib import Path
+
+
+def validate_file_folder(value: str) -> Path:
+ file_folder_path = Path(value)
+ if not file_folder_path.exists():
+ raise FileNotFoundError(f"No such file or folder: {value}")
+ if not file_folder_path.is_file() and not file_folder_path.is_dir():
+ raise TypeError(f"Not a file or directory: {value}")
+ return file_folder_path
+
+
+def validate_logfile(value: str) -> Path:
+ logfile_path = Path(value)
+ if logfile_path.exists():
+ confirm = input(f"The file {value} already exists. Do you want to overwrite it? ([y]es/[n]o): ")
+ if confirm.lower() not in ["yes", "y"]:
+ sys.exit(f"Exiting. Log file {value} will not be overwritten.")
+ return logfile_path
+
+
+def validate_environment():
+ if platform.system() != "Linux":
+ sys.exit("Error: This script must be run on a Linux system.")
+
+ if os.getuid() != 0:
+ sys.exit("Error: This script must be run with sudo or root privileges.")
diff --git a/pyproject.toml b/pyproject.toml
index 798406a..9eb37f5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,14 @@ description = "A comprehensive file shredder for Linux"
readme = "README.md"
requires-python = ">=3.9"
license = { text = "MIT" }
-dependencies = ["fastapi==0.95.2"]
+dependencies = [
+ "loguru==0.7.0",
+ "aiofiles==23.1.0",
+ "uvloop==0.17.0",
+ "pyzfs==0.2.3",
+ "asyncstdlib==3.10.8",
+ "psutil==5.9.5",
+]
[project.scripts]
ghostforge_adduser = "melamine.cli:run"
@@ -24,8 +31,6 @@ py-modules = ["melamine"]
[tool.bandit]
exclude_dirs = ["/doc", "/build"]
-# TODO: Stop skipping B104 (binding on 0.0.0.0), is there a nice way to get a good docker bind address?
-skips = ["B104"]
[tool.black]
line-length = 120