From 068d47703aae803e5ae04056063a5a1540c0e2e8 Mon Sep 17 00:00:00 2001 From: Darryl Nixon Date: Sun, 16 Jul 2023 12:20:57 -0700 Subject: [PATCH] ext2/3 is hard, use find as backup --- melamine/classes.py | 31 ++++++++++++++++++++++++++++++- melamine/shred.py | 8 ++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/melamine/classes.py b/melamine/classes.py index 7a1c63e..2452ca4 100644 --- a/melamine/classes.py +++ b/melamine/classes.py @@ -35,6 +35,7 @@ class ShredDir(AsyncObject): self.mount_points.add(self.mount_point) self.fs_handler = await mount_to_fs_handler(self.mount_point) self.byte_size = sum(item.byte_size for item in self.contents) + self.inode = path.stat().st_ino async def _get_contents(self) -> List: contents = [] @@ -63,13 +64,30 @@ class ShredDir(AsyncObject): def __hash__(self) -> int: return hash(self.absolute_path) + async def delete_hardlinks_by_inode(self) -> None: + logger.info(f"Finding and deleting hardlinks inside {self.absolute_path.name}") + for path in self.contents: + await path.delete_hardlinks_by_inode() + + proc = await asyncio.create_subprocess_exec("find", str(self.mount_point), "-inum", self.get_inode(), "-delete") + stdout, _ = await proc.communicate() + + if proc.returncode != 0: + err = f"Unable to delete hardlinks for {self.absolute_path.name}" + logger.error(err) + raise RuntimeError(err) + + logger.info(f"Deleted hardlink for {self.absolute_path.name}") + class ShredFile(AsyncObject): """Class for tracking each file to be shredded.""" async def __init__(self, path: Path) -> None: self.absolute_path = path.resolve().absolute() - self.byte_size = path.stat().st_size + stat = path.stat() + self.byte_size = stat.st_size + self.inode = stat.st_ino self.mount_point = find_mount(self.absolute_path) self.fs_handler = await mount_to_fs_handler(self.mount_point) self.hardlinks = None @@ -145,6 +163,17 @@ class ShredFile(AsyncObject): def __hash__(self) -> int: return hash(self.absolute_path) + async def delete_hardlinks_by_inode(self) -> None: + proc = await asyncio.create_subprocess_exec("find", str(self.mount_point), "-inum", self.get_inode(), "-delete") + stdout, _ = await proc.communicate() + + if proc.returncode != 0: + err = f"Unable to delete hardlinks for {self.absolute_path.name}" + logger.error(err) + raise RuntimeError(err) + + logger.info("Deleted hardlink for {self.absolute_path.name}") + async def get_all_hardlinks(paths: Set[Union[ShredFile, ShredDir]]) -> None: for path in paths: diff --git a/melamine/shred.py b/melamine/shred.py index f658cd6..0c172ca 100644 --- a/melamine/shred.py +++ b/melamine/shred.py @@ -12,7 +12,6 @@ async def main(job: argparse.Namespace) -> bool: It is called by the CLI and builds a job queue based on the arguments passed. """ new_paths = set() - logger.info(f"job type is {type(job)}") # Expand all directories and files, and collect mount point information for path in job.paths: @@ -28,9 +27,14 @@ async def main(job: argparse.Namespace) -> bool: else: raise TypeError(f"Not a file or directory: {path}") - # Get hardlinks to subsequently unlink for all files + # Try to delete hardlinks based on the filesystem type job.paths = await get_all_hardlinks(new_paths) + # Just in case, use "find" to delete any remaining hardlinks + # from the mount point + for path in job.paths: + await path.delete_hardlinks_by_inode() + # Shred all physical files including hardlinks for path in job.paths: if isinstance(path, ShredFile):