This commit is contained in:
Darryl Nixon 2023-07-16 17:34:02 -07:00
parent 2a773f6f1b
commit 0d61346b3c
3 changed files with 21 additions and 4 deletions

View file

@ -54,6 +54,15 @@ class ShredDir(AsyncObject):
tasks.append(ShredFile(subpath))
return set(await asyncio.gather(*tasks))
async def get_inodes(self, recursive: bool) -> Generator:
for path in self.contents:
if isinstance(path, ShredDir):
if recursive:
async for sub_inode in path.get_inodes(recursive):
yield sub_inode
elif isinstance(path, ShredFile):
yield path.inode
def enumerate_mount_points(self) -> Generator:
for item in self.contents:
if isinstance(item, ShredDir):
@ -196,6 +205,9 @@ class ShredFile(AsyncObject):
def __hash__(self) -> int:
return hash(self.absolute_path)
async def get_inodes(self, recursive: bool) -> Generator:
return self.inode
async def delete_hardlinks_by_inode(self) -> None:
proc = await asyncio.create_subprocess_exec("find", str(self.mount_point), "-inum", str(self.inode), "-delete")
stdout, _ = await proc.communicate()

View file

@ -29,15 +29,18 @@ async def get_all_mounts() -> Generator:
async def mount_bound_rglob(path: AsyncPath, mount: AsyncPath, pattern: str, ignoredirs: List[AsyncPath]) -> Generator:
"""Recursively glob a path, but stop at mount points."""
path = await path.absolute()
if str(path).startswith("/home/parallels"):
logger.warning(f"Processing {str(path)}")
path_mount = await find_mount(path)
if path_mount != mount:
logger.info(f"Skipping due to mount diff: {path} (wanted {mount})")
return
# Skip any ignored directories/files with a sanity check
for ignore in ignoredirs:
if str(path).startswith(str(ignore)) and await path.is_relative_to(ignore):
logger.info(f"Skipping ignored subdir: {path}")
return
if await path.is_dir():
if await find_mount(path) != mount:
logger.info(f"Skipping differently mounted subdir: {path} (wanted {mount})")
return
async for subpath in path.glob(pattern):
async for subitem in mount_bound_rglob(subpath, mount, pattern, ignoredirs):
yield subitem

View file

@ -58,7 +58,8 @@ async def main(job: argparse.Namespace) -> bool:
logger.info("Deleting remaining hardlinks using find")
inodes_in_mount_points = defaultdict(set)
for path in job.paths:
inodes_in_mount_points[path.mount_point].add(path.inode)
async for inode in path.get_inodes(job.recursive):
inodes_in_mount_points[path.mount_point].add(inode)
# We'll also limit concurrency to something reasonable since stat
# on an entire filesystem might be a bit burdensome
@ -81,6 +82,7 @@ async def main(job: argparse.Namespace) -> bool:
# checking for . and .. should not be neccessary w/ rglob
# scandir/glob/rglob doesn't play nice with FileNotFound errors,
# so let's avoid them in dynamic fs areas
logger.warning(f"Scanning {mount_point} for inodes: {inodes}")
if str(mount_point) == "/":
logger.info("Root filesystem mount processing")
async for item in mount_point.glob("*"):