From 79536c105e61ab1d3e350d856c35aa972babf744 Mon Sep 17 00:00:00 2001 From: Darryl Nixon Date: Sun, 16 Jul 2023 13:51:44 -0700 Subject: [PATCH] Refactoring and improving find --- melamine/classes.py | 2 +- melamine/shred.py | 43 +++++++++++++++++++++++++++++++------------ 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/melamine/classes.py b/melamine/classes.py index 58fefca..6dc917a 100644 --- a/melamine/classes.py +++ b/melamine/classes.py @@ -155,7 +155,7 @@ class ShredFile(AsyncObject): if self.hardlinks: log_buf = f"[5/4] Unlinking {len(self.hardlinks)} hardlinks" if not dryrun: - tasks = [link.unlink() for link in self.hardlinks] + tasks = [link.unlink() for link in self.hardlinks if link.name != "." and link.name != ".."] done, _ = await asyncio.wait(tasks) for task in done: e = task.exception() diff --git a/melamine/shred.py b/melamine/shred.py index a0bf80d..fd55310 100644 --- a/melamine/shred.py +++ b/melamine/shred.py @@ -27,32 +27,51 @@ async def main(job: argparse.Namespace) -> bool: raise TypeError(f"Not a file or directory: {path}") new_paths = set(await asyncio.gather(*tasks)) + # Shred all physical files including hardlinks + for path in job.paths: + tasks = [] + if isinstance(path, ShredFile): + tasks.append(path.shred(hash=job.exhaustive, dryrun=job.dryrun)) + elif isinstance(path, ShredDir): + tasks.append(path.shred(hash=job.exhaustive, dryrun=job.dryrun)) + done, _ = await asyncio.wait(*tasks) + for task in done: + e = task.exception() + if e: + logger.warning(f"Error raised while shredding: {e}") + # Try to delete hardlinks based on the filesystem type job.paths = await get_all_hardlinks(new_paths) # Just in case, use "find" to delete any remaining hardlinks - # from the mount point + # from the mount point, so let's build a map of inodes to mount points logger.info("Deleting remaining hardlinks using find") inodes_in_mount_points = defaultdict(set) for path in job.paths: inodes_in_mount_points[path.mount_point].add(path.inode) - for mount_point, inodes in inodes_in_mount_points.items(): - async for item in mount_point.rglob("*"): - if item.name == "." or item.name == "..": - continue - if item.stat().st_ino in inodes: + # We'll also limit concurrency to something reasonable since stat + # on an entire filesystem might be a bit burdensome + semaphore = asyncio.Semaphore(1024) + + async def check_inode_and_unlink(item): + async with semaphore: + if await item.stat().st_ino in inodes_in_mount_points[item.mount_point]: log_buf = f"Deleting hardlink: {item.path}" if not job.dryrun: log_buf = "DRY RUN " + log_buf await item.path.unlink() logger.info(log_buf) - # Shred all physical files including hardlinks - for path in job.paths: - if isinstance(path, ShredFile): - await path.shred(hash=job.exhaustive, dryrun=job.dryrun) - elif isinstance(path, ShredDir): - await path.shred(hash=job.exhaustive, dryrun=job.dryrun) + for mount_point, inodes in inodes_in_mount_points.items(): + # checking for . and .. should not be neccessary w/ rglob + tasks = [] + async for item in mount_point.rglob("*"): + tasks.append(check_inode_and_unlink(item)) + done, _ = await asyncio.wait(*tasks) + for task in done: + e = task.exception() + if e: + logger.warning(f"Unable to unlink hardlink: {e}") logger.info("Done")