From 9ea7fdd8d1d100f3b616ce5673bb55c034a00167 Mon Sep 17 00:00:00 2001 From: Darryl Nixon Date: Sun, 16 Jul 2023 10:55:05 -0700 Subject: [PATCH] Improve logic for getting hardlinks async --- melamine/classes.py | 12 ++++++------ melamine/fileops.py | 19 +++++++++++++++++++ melamine/shred.py | 13 +++---------- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/melamine/classes.py b/melamine/classes.py index ef59d1d..1b1e97c 100644 --- a/melamine/classes.py +++ b/melamine/classes.py @@ -65,7 +65,7 @@ class ShredFile: async def shred(self, hash: bool = False, dryrun: bool = False) -> Union[bool, bytes]: """Shred the file with a single file descriptor.""" try: - logger.info(f"Shredding file: {self.absolute_path}") + logger.info(f"Shredding: {self.absolute_path}") async with aiofiles.open(self.absolute_path, "rb+") as file: if hash: @@ -76,21 +76,21 @@ class ShredFile: logger.info(f"Got hash {sha1.hexdigest()}") # First pass: Overwrite with binary zeroes - logger.info("Performing first pass: Overwriting with binary zeroes") + logger.info(f"[1/4] Writing zeroes ({self.absolute_path.name})") await file.seek(0) if not dryrun: await file.write(b"\x00" * self.byte_size) await file.flush() # Second pass: Overwrite with binary ones - logger.info("Performing second pass: Overwriting with binary ones") + logger.info(f"[2/4] Writing ones ({self.absolute_path.name})") await file.seek(0) if not dryrun: await file.write(b"\xff" * self.byte_size) await file.flush() # Third pass: Overwrite with random data - logger.info("Performing third pass: Overwriting with random data") + logger.info(f"[3/4] Writing randoms ({self.absolute_path.name})") await file.seek(0) random_data = token_bytes(self.byte_size) if not dryrun: @@ -98,14 +98,14 @@ class ShredFile: await file.flush() # Remove the file - logger.info(f"Removing file {self.absolute_path}") + logger.info(f"[4/4] Unlinking {self.absolute_path}") if not dryrun: file.unlink() # Remove any hardlinks if self.hardlinks: - logger.info(f"Removing {len(self.hardlinks)} hardlinks") + logger.info(f"[5/4] Unlinking {len(self.hardlinks)} hardlinks") if not dryrun: for link in self.hardlinks: link.unlink() diff --git a/melamine/fileops.py b/melamine/fileops.py index 6b3fc35..374e1dd 100644 --- a/melamine/fileops.py +++ b/melamine/fileops.py @@ -1,13 +1,32 @@ import asyncio from pathlib import Path from typing import List +from typing import Set from asyncstdlib.functools import lru_cache +from .classes import ShredDir +from .classes import ShredFile from .filesystems import FSHandlers from .logs import logger +async def get_all_hardlinks(paths: Set[Path]) -> None: + for path in paths: + if isinstance(path, ShredFile): + logger.info("Getting hardlinks for {path}") + hardlink_count = 0 + path.hardlinks = set() + async for link in path.fs_handler.get_hardlinks(path): + hardlink_count += 1 + path.hardlinks.add(link) + logger.info(f"Found hardlink: {link}") + logger.info(f"Found {hardlink_count} hardlinks for {path.absolute_path}") + if isinstance(path, ShredDir): + await get_all_hardlinks(path) + return paths + + def find_mount(path: Path) -> Path: """Find the mount point for a given path.""" path = path.absolute() diff --git a/melamine/shred.py b/melamine/shred.py index a306f9a..772f13e 100644 --- a/melamine/shred.py +++ b/melamine/shred.py @@ -1,5 +1,6 @@ from .classes import ShredDir from .classes import ShredFile +from .fileops import get_all_hardlinks from .fileops import mount_to_fs_handler from .logs import logger @@ -10,6 +11,7 @@ async def main(job) -> bool: It is called by the CLI and builds a job queue based on the arguments passed. """ new_paths = set() + logger.info(f"job type is {type(job)}") # Expand all directories and files, and collect mount point information for path in job.paths: @@ -28,18 +30,9 @@ async def main(job) -> bool: logger.info(f"Skipping directory: {path} (try -r/--recursive)") else: raise TypeError(f"Not a file or directory: {path}") - job.paths = new_paths # Get hardlinks to subsequently unlink for all files - for path in job.paths: - if isinstance(path, ShredFile): - logger.info("Getting hardlinks for {path}") - hardlink_count = 0 - path.hardlinks = set() - async for link in path.fs_handler.get_hardlinks(path): - hardlink_count += 1 - path.hardlinks.add(link) - logger.info(f"Found hardlink: {link}") + job.paths = await get_all_hardlinks(new_paths) # Shred all physical files including hardlinks for path in job.paths: