diff --git a/README.md b/README.md
index b193890..a03f726 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
**melamine** is a better file shredder
-for linux
+it runs on linux (Ubuntu tested) for now
[Installation](#installation) •
[Example](#example) •
@@ -28,7 +28,24 @@ python3 -m pip install .
## Example
-todo
+melamine is not yet ready for production use. If you'd like to try it, generate dummy files below. Preferably, do this on a separate mount point than any essential files or in a virtual machine.
+
+Currently, only inode discovery and the beginnings of ext2/3 and zfs support exist.
+
+```bash
+rm -rf /tmp/melamine && mkdir -p /tmp/melamine;
+rm -rf /tmp/melamine_hardlinks && mkdir -p /tmp/melamine_hardlinks;
+for i in $(seq 1 4); do dd if=/dev/urandom of=/tmp/melamine/test_$i bs=1 count=$(( RANDOM % 256 * 256 )); done;
+ln /tmp/melamine/test_1 /tmp/melamine_hardlinks/hardlink_1;
+ln /tmp/melamine/test_2 /tmp/melamine_hardlinks/hardlink_2;
+```
+
+You can then confirm that the two hardlink inodes match using `ls -li`.
+
+```bash
+ls -li /tmp/melamine/test_*;
+ls -li /tmp/melamine_hardlinks/hardlink_*;
+```
## Contributing
diff --git a/melamine/fileops.py b/melamine/fileops.py
index 5b58aca..b0be666 100644
--- a/melamine/fileops.py
+++ b/melamine/fileops.py
@@ -1,5 +1,6 @@
import asyncio
from collections.abc import Generator
+from typing import List
import aiofiles
from aiopath import AsyncPath
@@ -9,6 +10,7 @@ from .filesystems import FSHandlers
from .logs import logger
+@lru_cache(maxsize=4096)
async def find_mount(path: AsyncPath) -> AsyncPath:
"""Find the mount point for a given path."""
path = await path.absolute()
@@ -24,6 +26,24 @@ async def get_all_mounts() -> Generator:
yield line.split()[1]
+async def mount_bound_rglob(path: AsyncPath, mount: AsyncPath, pattern: str, ignoredirs: List[AsyncPath]) -> Generator:
+ """Recursively glob a path, but stop at mount points."""
+ path = await path.absolute()
+ # Skip any ignored directories/files with a sanity check
+ for ignore in ignoredirs:
+ if str(path).startswith(str(ignore)) and await path.is_relative_to(ignore):
+ logger.info(f"Skipping ignored subdir: {path}")
+ return
+ if await path.is_dir():
+ if await find_mount(path) == mount:
+ logger.info(f"Skipping differently mounted subdir: {path} (wanted {mount}))")
+ return
+ for subpath in await path.glob(pattern):
+ async for subitem in mount_bound_rglob(subpath, mount, pattern):
+ yield subitem
+ yield path
+
+
@lru_cache(maxsize=1024)
async def mount_to_fs_handler(path: AsyncPath) -> str:
# TODO: This is a hacky way to get the filesystem type, but it works for now.
diff --git a/melamine/shred.py b/melamine/shred.py
index 2fff49e..00cf4c5 100644
--- a/melamine/shred.py
+++ b/melamine/shred.py
@@ -7,6 +7,7 @@ from aiopath import AsyncPath
from .classes import get_all_hardlinks
from .classes import ShredDir
from .classes import ShredFile
+from .fileops import mount_bound_rglob
from .logs import logger
IGNORE_GLOBAL = ("/proc", "/dev", "/sys")
@@ -84,17 +85,13 @@ async def main(job: argparse.Namespace) -> bool:
if await item.is_dir():
if str(item) in IGNORE_GLOBAL:
continue
- async for subitem in item.rglob("*"):
- if any(str(subitem).startswith(str(path)) for path in job.ignoredir):
- continue
+ async for subitem in mount_bound_rglob(item, mount_point, "*", job.ignoredir):
tasks.append(check_inode_and_unlink(subitem, inodes))
else:
tasks.append(check_inode_and_unlink(item, inodes))
else:
logger.info(f"Checking non-root filesystem mount: {str(mount_point)}")
- async for item in mount_point.rglob("*"):
- if any(str(item).startswith(str(path)) for path in job.ignoredir):
- continue
+ async for item in mount_bound_rglob(mount_point, mount_point, "*", job.ignoredir):
tasks.append(check_inode_and_unlink(item, inodes))
done, _ = await asyncio.wait(tasks)
for task in done: