Custom recurse globs and readme

This commit is contained in:
Darryl Nixon 2023-07-16 16:10:32 -07:00
parent 63d2d41587
commit 935c647624
3 changed files with 42 additions and 8 deletions

View file

@ -5,7 +5,7 @@
**melamine** is a better file shredder
for linux<br/>
it runs on linux (Ubuntu tested) for now<br/>
[Installation](#installation) •
[Example](#example) •
@ -28,7 +28,24 @@ python3 -m pip install .
## Example
todo
melamine is not yet ready for production use. If you'd like to try it, generate dummy files below. Preferably, do this on a separate mount point than any essential files or in a virtual machine.
Currently, only inode discovery and the beginnings of ext2/3 and zfs support exist.
```bash
rm -rf /tmp/melamine && mkdir -p /tmp/melamine;
rm -rf /tmp/melamine_hardlinks && mkdir -p /tmp/melamine_hardlinks;
for i in $(seq 1 4); do dd if=/dev/urandom of=/tmp/melamine/test_$i bs=1 count=$(( RANDOM % 256 * 256 )); done;
ln /tmp/melamine/test_1 /tmp/melamine_hardlinks/hardlink_1;
ln /tmp/melamine/test_2 /tmp/melamine_hardlinks/hardlink_2;
```
You can then confirm that the two hardlink inodes match using `ls -li`.
```bash
ls -li /tmp/melamine/test_*;
ls -li /tmp/melamine_hardlinks/hardlink_*;
```
## Contributing

View file

@ -1,5 +1,6 @@
import asyncio
from collections.abc import Generator
from typing import List
import aiofiles
from aiopath import AsyncPath
@ -9,6 +10,7 @@ from .filesystems import FSHandlers
from .logs import logger
@lru_cache(maxsize=4096)
async def find_mount(path: AsyncPath) -> AsyncPath:
"""Find the mount point for a given path."""
path = await path.absolute()
@ -24,6 +26,24 @@ async def get_all_mounts() -> Generator:
yield line.split()[1]
async def mount_bound_rglob(path: AsyncPath, mount: AsyncPath, pattern: str, ignoredirs: List[AsyncPath]) -> Generator:
"""Recursively glob a path, but stop at mount points."""
path = await path.absolute()
# Skip any ignored directories/files with a sanity check
for ignore in ignoredirs:
if str(path).startswith(str(ignore)) and await path.is_relative_to(ignore):
logger.info(f"Skipping ignored subdir: {path}")
return
if await path.is_dir():
if await find_mount(path) == mount:
logger.info(f"Skipping differently mounted subdir: {path} (wanted {mount}))")
return
for subpath in await path.glob(pattern):
async for subitem in mount_bound_rglob(subpath, mount, pattern):
yield subitem
yield path
@lru_cache(maxsize=1024)
async def mount_to_fs_handler(path: AsyncPath) -> str:
# TODO: This is a hacky way to get the filesystem type, but it works for now.

View file

@ -7,6 +7,7 @@ from aiopath import AsyncPath
from .classes import get_all_hardlinks
from .classes import ShredDir
from .classes import ShredFile
from .fileops import mount_bound_rglob
from .logs import logger
IGNORE_GLOBAL = ("/proc", "/dev", "/sys")
@ -84,17 +85,13 @@ async def main(job: argparse.Namespace) -> bool:
if await item.is_dir():
if str(item) in IGNORE_GLOBAL:
continue
async for subitem in item.rglob("*"):
if any(str(subitem).startswith(str(path)) for path in job.ignoredir):
continue
async for subitem in mount_bound_rglob(item, mount_point, "*", job.ignoredir):
tasks.append(check_inode_and_unlink(subitem, inodes))
else:
tasks.append(check_inode_and_unlink(item, inodes))
else:
logger.info(f"Checking non-root filesystem mount: {str(mount_point)}")
async for item in mount_point.rglob("*"):
if any(str(item).startswith(str(path)) for path in job.ignoredir):
continue
async for item in mount_bound_rglob(mount_point, mount_point, "*", job.ignoredir):
tasks.append(check_inode_and_unlink(item, inodes))
done, _ = await asyncio.wait(tasks)
for task in done: