Refactoring and improving find

This commit is contained in:
Darryl Nixon 2023-07-16 13:51:44 -07:00
parent 0f6b8bd438
commit 79536c105e
2 changed files with 32 additions and 13 deletions

View file

@ -155,7 +155,7 @@ class ShredFile(AsyncObject):
if self.hardlinks: if self.hardlinks:
log_buf = f"[5/4] Unlinking {len(self.hardlinks)} hardlinks" log_buf = f"[5/4] Unlinking {len(self.hardlinks)} hardlinks"
if not dryrun: if not dryrun:
tasks = [link.unlink() for link in self.hardlinks] tasks = [link.unlink() for link in self.hardlinks if link.name != "." and link.name != ".."]
done, _ = await asyncio.wait(tasks) done, _ = await asyncio.wait(tasks)
for task in done: for task in done:
e = task.exception() e = task.exception()

View file

@ -27,32 +27,51 @@ async def main(job: argparse.Namespace) -> bool:
raise TypeError(f"Not a file or directory: {path}") raise TypeError(f"Not a file or directory: {path}")
new_paths = set(await asyncio.gather(*tasks)) new_paths = set(await asyncio.gather(*tasks))
# Shred all physical files including hardlinks
for path in job.paths:
tasks = []
if isinstance(path, ShredFile):
tasks.append(path.shred(hash=job.exhaustive, dryrun=job.dryrun))
elif isinstance(path, ShredDir):
tasks.append(path.shred(hash=job.exhaustive, dryrun=job.dryrun))
done, _ = await asyncio.wait(*tasks)
for task in done:
e = task.exception()
if e:
logger.warning(f"Error raised while shredding: {e}")
# Try to delete hardlinks based on the filesystem type # Try to delete hardlinks based on the filesystem type
job.paths = await get_all_hardlinks(new_paths) job.paths = await get_all_hardlinks(new_paths)
# Just in case, use "find" to delete any remaining hardlinks # Just in case, use "find" to delete any remaining hardlinks
# from the mount point # from the mount point, so let's build a map of inodes to mount points
logger.info("Deleting remaining hardlinks using find") logger.info("Deleting remaining hardlinks using find")
inodes_in_mount_points = defaultdict(set) inodes_in_mount_points = defaultdict(set)
for path in job.paths: for path in job.paths:
inodes_in_mount_points[path.mount_point].add(path.inode) inodes_in_mount_points[path.mount_point].add(path.inode)
for mount_point, inodes in inodes_in_mount_points.items(): # We'll also limit concurrency to something reasonable since stat
async for item in mount_point.rglob("*"): # on an entire filesystem might be a bit burdensome
if item.name == "." or item.name == "..": semaphore = asyncio.Semaphore(1024)
continue
if item.stat().st_ino in inodes: async def check_inode_and_unlink(item):
async with semaphore:
if await item.stat().st_ino in inodes_in_mount_points[item.mount_point]:
log_buf = f"Deleting hardlink: {item.path}" log_buf = f"Deleting hardlink: {item.path}"
if not job.dryrun: if not job.dryrun:
log_buf = "DRY RUN " + log_buf log_buf = "DRY RUN " + log_buf
await item.path.unlink() await item.path.unlink()
logger.info(log_buf) logger.info(log_buf)
# Shred all physical files including hardlinks for mount_point, inodes in inodes_in_mount_points.items():
for path in job.paths: # checking for . and .. should not be neccessary w/ rglob
if isinstance(path, ShredFile): tasks = []
await path.shred(hash=job.exhaustive, dryrun=job.dryrun) async for item in mount_point.rglob("*"):
elif isinstance(path, ShredDir): tasks.append(check_inode_and_unlink(item))
await path.shred(hash=job.exhaustive, dryrun=job.dryrun) done, _ = await asyncio.wait(*tasks)
for task in done:
e = task.exception()
if e:
logger.warning(f"Unable to unlink hardlink: {e}")
logger.info("Done") logger.info("Done")