From 0d61346b3c45b12bfb63c39f28eec95c08971d2b Mon Sep 17 00:00:00 2001 From: Darryl Nixon Date: Sun, 16 Jul 2023 17:34:02 -0700 Subject: [PATCH] mvp --- melamine/classes.py | 12 ++++++++++++ melamine/fileops.py | 9 ++++++--- melamine/shred.py | 4 +++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/melamine/classes.py b/melamine/classes.py index b272334..76b8496 100644 --- a/melamine/classes.py +++ b/melamine/classes.py @@ -53,6 +53,15 @@ class ShredDir(AsyncObject): elif await subpath.is_file(): tasks.append(ShredFile(subpath)) return set(await asyncio.gather(*tasks)) + + async def get_inodes(self, recursive: bool) -> Generator: + for path in self.contents: + if isinstance(path, ShredDir): + if recursive: + async for sub_inode in path.get_inodes(recursive): + yield sub_inode + elif isinstance(path, ShredFile): + yield path.inode def enumerate_mount_points(self) -> Generator: for item in self.contents: @@ -196,6 +205,9 @@ class ShredFile(AsyncObject): def __hash__(self) -> int: return hash(self.absolute_path) + async def get_inodes(self, recursive: bool) -> Generator: + return self.inode + async def delete_hardlinks_by_inode(self) -> None: proc = await asyncio.create_subprocess_exec("find", str(self.mount_point), "-inum", str(self.inode), "-delete") stdout, _ = await proc.communicate() diff --git a/melamine/fileops.py b/melamine/fileops.py index 72212a2..08c03fa 100644 --- a/melamine/fileops.py +++ b/melamine/fileops.py @@ -29,15 +29,18 @@ async def get_all_mounts() -> Generator: async def mount_bound_rglob(path: AsyncPath, mount: AsyncPath, pattern: str, ignoredirs: List[AsyncPath]) -> Generator: """Recursively glob a path, but stop at mount points.""" path = await path.absolute() + if str(path).startswith("/home/parallels"): + logger.warning(f"Processing {str(path)}") + path_mount = await find_mount(path) + if path_mount != mount: + logger.info(f"Skipping due to mount diff: {path} (wanted {mount})") + return # Skip any ignored directories/files with a sanity check for ignore in ignoredirs: if str(path).startswith(str(ignore)) and await path.is_relative_to(ignore): logger.info(f"Skipping ignored subdir: {path}") return if await path.is_dir(): - if await find_mount(path) != mount: - logger.info(f"Skipping differently mounted subdir: {path} (wanted {mount})") - return async for subpath in path.glob(pattern): async for subitem in mount_bound_rglob(subpath, mount, pattern, ignoredirs): yield subitem diff --git a/melamine/shred.py b/melamine/shred.py index 1a0980e..b918489 100644 --- a/melamine/shred.py +++ b/melamine/shred.py @@ -58,7 +58,8 @@ async def main(job: argparse.Namespace) -> bool: logger.info("Deleting remaining hardlinks using find") inodes_in_mount_points = defaultdict(set) for path in job.paths: - inodes_in_mount_points[path.mount_point].add(path.inode) + async for inode in path.get_inodes(job.recursive): + inodes_in_mount_points[path.mount_point].add(inode) # We'll also limit concurrency to something reasonable since stat # on an entire filesystem might be a bit burdensome @@ -81,6 +82,7 @@ async def main(job: argparse.Namespace) -> bool: # checking for . and .. should not be neccessary w/ rglob # scandir/glob/rglob doesn't play nice with FileNotFound errors, # so let's avoid them in dynamic fs areas + logger.warning(f"Scanning {mount_point} for inodes: {inodes}") if str(mount_point) == "/": logger.info("Root filesystem mount processing") async for item in mount_point.glob("*"):