diff --git a/README.md b/README.md index b193890..a03f726 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ **melamine** is a better file shredder -for linux
+it runs on linux (Ubuntu tested) for now
[Installation](#installation) • [Example](#example) • @@ -28,7 +28,24 @@ python3 -m pip install . ## Example -todo +melamine is not yet ready for production use. If you'd like to try it, generate dummy files below. Preferably, do this on a separate mount point than any essential files or in a virtual machine. + +Currently, only inode discovery and the beginnings of ext2/3 and zfs support exist. + +```bash +rm -rf /tmp/melamine && mkdir -p /tmp/melamine; +rm -rf /tmp/melamine_hardlinks && mkdir -p /tmp/melamine_hardlinks; +for i in $(seq 1 4); do dd if=/dev/urandom of=/tmp/melamine/test_$i bs=1 count=$(( RANDOM % 256 * 256 )); done; +ln /tmp/melamine/test_1 /tmp/melamine_hardlinks/hardlink_1; +ln /tmp/melamine/test_2 /tmp/melamine_hardlinks/hardlink_2; +``` + +You can then confirm that the two hardlink inodes match using `ls -li`. + +```bash +ls -li /tmp/melamine/test_*; +ls -li /tmp/melamine_hardlinks/hardlink_*; +``` ## Contributing diff --git a/melamine/fileops.py b/melamine/fileops.py index 5b58aca..b0be666 100644 --- a/melamine/fileops.py +++ b/melamine/fileops.py @@ -1,5 +1,6 @@ import asyncio from collections.abc import Generator +from typing import List import aiofiles from aiopath import AsyncPath @@ -9,6 +10,7 @@ from .filesystems import FSHandlers from .logs import logger +@lru_cache(maxsize=4096) async def find_mount(path: AsyncPath) -> AsyncPath: """Find the mount point for a given path.""" path = await path.absolute() @@ -24,6 +26,24 @@ async def get_all_mounts() -> Generator: yield line.split()[1] +async def mount_bound_rglob(path: AsyncPath, mount: AsyncPath, pattern: str, ignoredirs: List[AsyncPath]) -> Generator: + """Recursively glob a path, but stop at mount points.""" + path = await path.absolute() + # Skip any ignored directories/files with a sanity check + for ignore in ignoredirs: + if str(path).startswith(str(ignore)) and await path.is_relative_to(ignore): + logger.info(f"Skipping ignored subdir: {path}") + return + if await path.is_dir(): + if await find_mount(path) == mount: + logger.info(f"Skipping differently mounted subdir: {path} (wanted {mount}))") + return + for subpath in await path.glob(pattern): + async for subitem in mount_bound_rglob(subpath, mount, pattern): + yield subitem + yield path + + @lru_cache(maxsize=1024) async def mount_to_fs_handler(path: AsyncPath) -> str: # TODO: This is a hacky way to get the filesystem type, but it works for now. diff --git a/melamine/shred.py b/melamine/shred.py index 2fff49e..00cf4c5 100644 --- a/melamine/shred.py +++ b/melamine/shred.py @@ -7,6 +7,7 @@ from aiopath import AsyncPath from .classes import get_all_hardlinks from .classes import ShredDir from .classes import ShredFile +from .fileops import mount_bound_rglob from .logs import logger IGNORE_GLOBAL = ("/proc", "/dev", "/sys") @@ -84,17 +85,13 @@ async def main(job: argparse.Namespace) -> bool: if await item.is_dir(): if str(item) in IGNORE_GLOBAL: continue - async for subitem in item.rglob("*"): - if any(str(subitem).startswith(str(path)) for path in job.ignoredir): - continue + async for subitem in mount_bound_rglob(item, mount_point, "*", job.ignoredir): tasks.append(check_inode_and_unlink(subitem, inodes)) else: tasks.append(check_inode_and_unlink(item, inodes)) else: logger.info(f"Checking non-root filesystem mount: {str(mount_point)}") - async for item in mount_point.rglob("*"): - if any(str(item).startswith(str(path)) for path in job.ignoredir): - continue + async for item in mount_bound_rglob(mount_point, mount_point, "*", job.ignoredir): tasks.append(check_inode_and_unlink(item, inodes)) done, _ = await asyncio.wait(tasks) for task in done: