mirror of
https://github.com/DarrylNixon/melamine.git
synced 2024-04-22 06:27:20 -07:00
MVP for testing
This commit is contained in:
parent
3396d2d69b
commit
9bc6f8d9e1
11 changed files with 459 additions and 3 deletions
3
.flake8
Normal file
3
.flake8
Normal file
|
@ -0,0 +1,3 @@
|
|||
[flake8]
|
||||
max-line-length = 160
|
||||
exclude = docs/*, .git, __pycache__, build
|
124
melamine/classes.py
Normal file
124
melamine/classes.py
Normal file
|
@ -0,0 +1,124 @@
|
|||
import asyncio
|
||||
import hashlib
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
from secrets import token_bytes
|
||||
from typing import List
|
||||
from typing import Union
|
||||
|
||||
import aiofiles
|
||||
|
||||
from .fileops import find_mount
|
||||
from .logs import logger
|
||||
|
||||
|
||||
class ShredDir:
|
||||
"""Class for tracking each directory to be shredded, and its contents."""
|
||||
|
||||
def __init__(self, path: Path) -> None:
|
||||
self.absolute_path = path.resolve()
|
||||
self.processed = False
|
||||
self.contents = self._get_contents()
|
||||
self.byte_size = sum(item.byte_size for item in self.contents)
|
||||
self.mount_points = set(m for m in self.get_mount_points())
|
||||
self.mount_points.add(find_mount(self.absolute_path))
|
||||
self.fs_handler = None
|
||||
|
||||
def _get_contents(self) -> List:
|
||||
contents = []
|
||||
for subpath in self.absolute_path.glob("*"):
|
||||
if subpath.is_dir():
|
||||
if subpath.is_symlink():
|
||||
logger.warning(f"Symlink subdirectory found: {subpath}, skipping")
|
||||
continue
|
||||
contents.append(ShredDir(subpath))
|
||||
elif subpath.is_file():
|
||||
contents.append(ShredFile(subpath))
|
||||
return contents
|
||||
|
||||
def get_mount_points(self) -> Generator:
|
||||
for item in self.contents:
|
||||
if isinstance(item, ShredDir):
|
||||
yield from item.get_mount_points()
|
||||
yield self.mount_point
|
||||
|
||||
async def shred(self, hash: bool = False, dryrun: bool = False) -> bool:
|
||||
tasks = []
|
||||
for item in self.contents:
|
||||
tasks.append(item.shred(hash, dryrun))
|
||||
return all(await asyncio.gather(*tasks))
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash(self.absolute_path)
|
||||
|
||||
|
||||
class ShredFile:
|
||||
"""Class for tracking each file to be shredded."""
|
||||
|
||||
def __init__(self, path: Path) -> None:
|
||||
self.absolute_path = path.resolve()
|
||||
self.byte_size = path.stat().st_size
|
||||
self.processed = False
|
||||
self.mount_point = find_mount(self.absolute_path)
|
||||
self.fs_handler = None
|
||||
self.hardlinks = None
|
||||
|
||||
async def shred(self, hash: bool = False, dryrun: bool = False) -> Union[bool, bytes]:
|
||||
"""Shred the file with a single file descriptor."""
|
||||
if not self.processed:
|
||||
logger.error(f"File {self.absolute_path} tried to shred early.")
|
||||
return False
|
||||
try:
|
||||
logger.info(f"Shredding file: {self.absolute_path}")
|
||||
|
||||
async with aiofiles.open(self.absolute_path, "rb+") as file:
|
||||
if hash:
|
||||
sha1 = hashlib.sha1(usedforsecurity=False)
|
||||
async for chunk in aiofiles.iterate(file):
|
||||
sha1.update(chunk)
|
||||
self.sha1 = sha1.digest()
|
||||
logger.info(f"Got hash {sha1.hexdigest()}")
|
||||
|
||||
# First pass: Overwrite with binary zeroes
|
||||
logger.info("Performing first pass: Overwriting with binary zeroes")
|
||||
await file.seek(0)
|
||||
if not dryrun:
|
||||
await file.write(b"\x00" * self.byte_size)
|
||||
await file.flush()
|
||||
|
||||
# Second pass: Overwrite with binary ones
|
||||
logger.info("Performing second pass: Overwriting with binary ones")
|
||||
await file.seek(0)
|
||||
if not dryrun:
|
||||
await file.write(b"\xff" * self.byte_size)
|
||||
await file.flush()
|
||||
|
||||
# Third pass: Overwrite with random data
|
||||
logger.info("Performing third pass: Overwriting with random data")
|
||||
await file.seek(0)
|
||||
random_data = token_bytes(self.byte_size)
|
||||
if not dryrun:
|
||||
await file.write(random_data)
|
||||
await file.flush()
|
||||
|
||||
# Remove the file
|
||||
logger.info(f"Removing file {self.absolute_path}")
|
||||
|
||||
if not dryrun:
|
||||
file.unlink()
|
||||
|
||||
# Remove any hardlinks
|
||||
if self.hardlinks:
|
||||
logger.info(f"Removing {len(self.hardlinks)} hardlinks")
|
||||
if not dryrun:
|
||||
for link in self.hardlinks:
|
||||
link.unlink()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"File wipe failed: {e}")
|
||||
return False
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash(self.absolute_path)
|
|
@ -0,0 +1,56 @@
|
|||
import asyncio
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import uvloop
|
||||
|
||||
from .shred import main
|
||||
from .validators import *
|
||||
|
||||
|
||||
# flake8: noqa: E501
|
||||
def run() -> None:
|
||||
validate_environment()
|
||||
|
||||
parser = ArgumentParser(description="Comprehensive DoD 5220.22-M file shredder for Linux.")
|
||||
parser.add_argument(
|
||||
"--recursive", "-r", action="store_true", help="Process directories recursively. Default is false."
|
||||
)
|
||||
parser.add_argument("--yes", "-y", action="store_true", help="Skip confirmation prompts. Default is false.")
|
||||
parser.add_argument(
|
||||
"--dryrun", "-d", action="store_true", help="Provide mock output without deleting anything. Default is false."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--exhaustive",
|
||||
"-e",
|
||||
action="store_true",
|
||||
help="Exhaustively check local mounts for duplicate files by hash. Default is false.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ignoredir",
|
||||
"-i",
|
||||
action="append",
|
||||
type=validate_file_folder,
|
||||
default=[],
|
||||
help="Specify directories to be ignored during the process. This option can be used multiple times.",
|
||||
)
|
||||
parser.add_argument("--logfile", "-o", type=validate_logfile, help="Specify a file to log all output.")
|
||||
parser.add_argument("--quiet", "-q", action="store_true", help="Silence all output.")
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Provide extra output for debugging.")
|
||||
parser.add_argument(
|
||||
"paths",
|
||||
nargs="+",
|
||||
type=validate_file_folder,
|
||||
help="Specify any number of existing files or directories to be processed.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if sys.version_info >= (3, 11):
|
||||
with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner:
|
||||
runner.run(main(args))
|
||||
else:
|
||||
uvloop.install()
|
||||
asyncio.run(main(args))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
50
melamine/fileops.py
Normal file
50
melamine/fileops.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
import asyncio
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from asyncstdlib.functools import lru_cache
|
||||
|
||||
from .filesystems import FSHandlers
|
||||
from .logs import logger
|
||||
|
||||
|
||||
def find_mount(path: Path) -> Path:
|
||||
"""Find the mount point for a given path."""
|
||||
path = path.absolute()
|
||||
while not path.is_mount():
|
||||
path = path.parent
|
||||
return path
|
||||
|
||||
|
||||
def get_all_mounts() -> List:
|
||||
"""Get a list of all mounted filesystems."""
|
||||
mounts = []
|
||||
with open("/proc/mounts", "r") as f:
|
||||
for line in f:
|
||||
mount = line.split()[1]
|
||||
mounts.append(mount)
|
||||
return mounts
|
||||
|
||||
|
||||
@lru_cache(maxsize=1024)
|
||||
async def mount_to_fs_handler(path: Path) -> str:
|
||||
# TODO: This is a hacky way to get the filesystem type, but it works for now.
|
||||
# Maybe with libblkid Python bindings?
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"stat", "-f", "-L", "-c", "%T", str(path), stdout=asyncio.subprocess.PIPE, stdin=asyncio.subprocess.PIPE
|
||||
)
|
||||
stdout, _ = await proc.communicate()
|
||||
|
||||
if proc.returncode != 0:
|
||||
err = f"Unable to get filesystem for {path}"
|
||||
logger.error(err)
|
||||
raise RuntimeError(err)
|
||||
|
||||
fs = stdout.decode().strip()
|
||||
|
||||
try:
|
||||
return FSHandlers[fs]
|
||||
except KeyError:
|
||||
err = f"Unsupported filesystem: {fs}"
|
||||
logger.error(err)
|
||||
raise RuntimeError(err)
|
7
melamine/filesystems/__init__.py
Normal file
7
melamine/filesystems/__init__.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
from .ext23 import EXT23Handler
|
||||
from .zfs import ZFSHandler
|
||||
|
||||
# from .btrfs import BTRFSHandler
|
||||
# from .ext4 import EXT4Handler
|
||||
|
||||
FSHandlers = {"zfs": ZFSHandler(), "ext2/ext3": EXT23Handler()}
|
78
melamine/filesystems/ext23.py
Normal file
78
melamine/filesystems/ext23.py
Normal file
|
@ -0,0 +1,78 @@
|
|||
import ctypes
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class ext2_filsys(ctypes.Structure):
|
||||
pass
|
||||
|
||||
|
||||
class ext2_inode_scan(ctypes.Structure):
|
||||
pass
|
||||
|
||||
|
||||
class ext2_inode_large(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("i_mode", ctypes.c_uint16),
|
||||
("i_uid", ctypes.c_uint16),
|
||||
("i_size", ctypes.c_uint32),
|
||||
("i_atime", ctypes.c_uint32),
|
||||
("i_ctime", ctypes.c_uint32),
|
||||
("i_mtime", ctypes.c_uint32),
|
||||
("i_dtime", ctypes.c_uint32),
|
||||
("i_gid", ctypes.c_uint16),
|
||||
("i_links_count", ctypes.c_uint16),
|
||||
("i_blocks", ctypes.c_uint32),
|
||||
("i_flags", ctypes.c_uint32),
|
||||
("i_osd1", ctypes.c_uint32 * 3),
|
||||
("i_block", ctypes.c_uint32 * 15),
|
||||
("i_generation", ctypes.c_uint32),
|
||||
("i_file_acl", ctypes.c_uint32),
|
||||
("i_dir_acl", ctypes.c_uint32),
|
||||
("i_faddr", ctypes.c_uint32),
|
||||
("i_osd2", ctypes.c_uint8 * 12),
|
||||
]
|
||||
|
||||
|
||||
class ext2_inode_large_p(ctypes.POINTER(ext2_inode_large)):
|
||||
pass
|
||||
|
||||
|
||||
class EXT23Handler:
|
||||
def __init__(self, fs: str) -> None:
|
||||
self.fs = "ext2/ext3"
|
||||
self.libext2fs = ctypes.CDLL("libext2fs.so.2")
|
||||
self.libext2fs.ext2fs_open.restype = ctypes.c_int
|
||||
self.libext2fs.ext2fs_open.argtypes = [
|
||||
ctypes.c_char_p,
|
||||
ctypes.c_int,
|
||||
ctypes.c_int,
|
||||
ctypes.c_uint32,
|
||||
ctypes.POINTER(ext2_filsys),
|
||||
]
|
||||
self.libext2fs.ext2fs_close.argtypes = [ext2_filsys]
|
||||
self.libext2fs.ext2fs_get_next_inode.argtypes = [ext2_inode_scan, ext2_inode_large_p]
|
||||
self.libext2fs.ext2fs_get_next_inode.restype = ctypes.c_int
|
||||
|
||||
async def get_hardlinks(self, path: Path) -> Generator:
|
||||
path = path.resolve().absolute()
|
||||
inode = path.stat().st_ino
|
||||
|
||||
fs = ext2_filsys()
|
||||
ret = self.libext2fs.ext2fs_open(path.encode(), 0, 0, 0, ctypes.byref(fs))
|
||||
if ret != 0:
|
||||
return []
|
||||
|
||||
scan = ext2_inode_scan()
|
||||
ret = self.libext2fs.ext2fs_open_inode_scan(fs, ctypes.byref(scan))
|
||||
if ret != 0:
|
||||
self.libext2fs.ext2fs_close(fs)
|
||||
return []
|
||||
|
||||
inode_large = ext2_inode_large()
|
||||
while self.libext2fs.ext2fs_get_next_inode(scan, ctypes.byref(inode_large)) == 0:
|
||||
if inode_large.i_links_count > 1 and inode_large.i_file_acl == inode:
|
||||
yield Path(fs.fs_mount_point) / scan.name.decode()
|
||||
|
||||
self.libext2fs.ext2fs_close_inode_scan(scan)
|
||||
self.libext2fs.ext2fs_close(fs)
|
25
melamine/filesystems/zfs.py
Normal file
25
melamine/filesystems/zfs.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
import pyzfs
|
||||
|
||||
|
||||
class ZFSHandler:
|
||||
def __init__(self, fs: str) -> None:
|
||||
self.fs = "zfs"
|
||||
|
||||
async def get_hardlinks(self, path: Path) -> Generator:
|
||||
path = path.resolve().absolute()
|
||||
inode = path.stat().st_ino
|
||||
|
||||
zfs = pyzfs.ZFS()
|
||||
dataset = zfs.get_dataset_by_path(str(path))
|
||||
if dataset is not None:
|
||||
pool = dataset.pool
|
||||
filesystem = dataset.filesystem
|
||||
fs = pool.open(filesystem)
|
||||
|
||||
for snapshot in fs.snapshots():
|
||||
for entry in snapshot.ls(str(path)):
|
||||
if entry.inode() == inode:
|
||||
yield Path(entry.path())
|
35
melamine/logs.py
Normal file
35
melamine/logs.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
from loguru import logger
|
||||
|
||||
|
||||
logger.add(
|
||||
"app.log",
|
||||
format="<level><light-blue>{time:YYYY-MM-DD HH:mm:ss} | {message}</light-blue></level>",
|
||||
level="INFO",
|
||||
rotation="1 day",
|
||||
retention="30 days",
|
||||
)
|
||||
|
||||
logger.add(
|
||||
"errors.log",
|
||||
format="<level><yellow>ℹ️ {time:YYYY-MM-DD HH:mm:ss} | {message}</yellow></level>",
|
||||
level="WARNING",
|
||||
rotation="1 day",
|
||||
retention="30 days",
|
||||
)
|
||||
|
||||
logger.add(
|
||||
"error.log",
|
||||
format="<level><red>⛔️ {time:YYYY-MM-DD HH:mm:ss} | {message}</red></level>",
|
||||
level="ERROR",
|
||||
rotation="1 day",
|
||||
retention="30 days",
|
||||
)
|
||||
|
||||
|
||||
logger.add(
|
||||
"critical.log",
|
||||
format="<level><magenta>🚨 {time:YYYY-MM-DD HH:mm:ss} | {message}</magenta></level>",
|
||||
level="CRITICAL",
|
||||
rotation="1 day",
|
||||
retention="30 days",
|
||||
)
|
43
melamine/shred.py
Normal file
43
melamine/shred.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
from .classes import ShredDir
|
||||
from .classes import ShredFile
|
||||
from .fileops import mount_to_fs
|
||||
from .logs import logger
|
||||
|
||||
|
||||
async def main(job) -> bool:
|
||||
"""
|
||||
This is the main function for processing a shred request.
|
||||
It is called by the CLI and builds a job queue based on the arguments passed.
|
||||
"""
|
||||
new_paths = set()
|
||||
|
||||
# Expand all directories and files, and collect mount point information
|
||||
for path in job.paths:
|
||||
if path.is_file():
|
||||
logger.info(f"Adding file: {path}")
|
||||
shred_file = ShredFile(path)
|
||||
shred_file.fs_handler = await mount_to_fs(shred_file.mount_point)
|
||||
new_paths.add(shred_file)
|
||||
elif path.is_dir():
|
||||
if job.recursive:
|
||||
logger.info(f"Adding directory: {path}")
|
||||
shred_dir = ShredDir(path)
|
||||
shred_dir.fs_handler = await mount_to_fs(shred_dir.mount_point)
|
||||
new_paths.add(shred_dir)
|
||||
else:
|
||||
logger.info(f"Skipping directory: {path} (try -r/--recursive)")
|
||||
else:
|
||||
raise TypeError(f"Not a file or directory: {path}")
|
||||
job.paths = new_paths
|
||||
|
||||
# Get hardlinks to subsequently unlink for all files
|
||||
for path in job.paths:
|
||||
if isinstance(path, ShredFile):
|
||||
path.hardlinks = set(link async for link in path.fs_handler.get_hardlinks(path))
|
||||
|
||||
# Shred all physical files including hardlinks
|
||||
for path in job.paths:
|
||||
if isinstance(path, ShredFile):
|
||||
await path.shred(job.hash, job.dryrun)
|
||||
elif isinstance(path, ShredDir):
|
||||
await path.shred(job.hash, job.dryrun)
|
30
melamine/validators.py
Normal file
30
melamine/validators.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
import os
|
||||
import platform
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def validate_file_folder(value: str) -> Path:
|
||||
file_folder_path = Path(value)
|
||||
if not file_folder_path.exists():
|
||||
raise FileNotFoundError(f"No such file or folder: {value}")
|
||||
if not file_folder_path.is_file() and not file_folder_path.is_dir():
|
||||
raise TypeError(f"Not a file or directory: {value}")
|
||||
return file_folder_path
|
||||
|
||||
|
||||
def validate_logfile(value: str) -> Path:
|
||||
logfile_path = Path(value)
|
||||
if logfile_path.exists():
|
||||
confirm = input(f"The file {value} already exists. Do you want to overwrite it? ([y]es/[n]o): ")
|
||||
if confirm.lower() not in ["yes", "y"]:
|
||||
sys.exit(f"Exiting. Log file {value} will not be overwritten.")
|
||||
return logfile_path
|
||||
|
||||
|
||||
def validate_environment():
|
||||
if platform.system() != "Linux":
|
||||
sys.exit("Error: This script must be run on a Linux system.")
|
||||
|
||||
if os.getuid() != 0:
|
||||
sys.exit("Error: This script must be run with sudo or root privileges.")
|
|
@ -10,7 +10,14 @@ description = "A comprehensive file shredder for Linux"
|
|||
readme = "README.md"
|
||||
requires-python = ">=3.9"
|
||||
license = { text = "MIT" }
|
||||
dependencies = ["fastapi==0.95.2"]
|
||||
dependencies = [
|
||||
"loguru==0.7.0",
|
||||
"aiofiles==23.1.0",
|
||||
"uvloop==0.17.0",
|
||||
"pyzfs==0.2.3",
|
||||
"asyncstdlib==3.10.8",
|
||||
"psutil==5.9.5",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
ghostforge_adduser = "melamine.cli:run"
|
||||
|
@ -24,8 +31,6 @@ py-modules = ["melamine"]
|
|||
|
||||
[tool.bandit]
|
||||
exclude_dirs = ["/doc", "/build"]
|
||||
# TODO: Stop skipping B104 (binding on 0.0.0.0), is there a nice way to get a good docker bind address?
|
||||
skips = ["B104"]
|
||||
|
||||
[tool.black]
|
||||
line-length = 120
|
||||
|
|
Loading…
Reference in a new issue