melamine/melamine/shred.py

106 lines
4.4 KiB
Python

import argparse
import asyncio
from collections import defaultdict
from aiopath import AsyncPath
from .classes import get_all_hardlinks
from .classes import ShredDir
from .classes import ShredFile
from .fileops import mount_bound_rglob
from .logs import logger
IGNORE_GLOBAL = ("/proc", "/dev", "/sys", "/run")
async def main(job: argparse.Namespace) -> bool:
"""
This is the main function for processing a shred request.
It is called by the CLI and builds a job queue based on the arguments passed.
"""
# Expand all directories and files, and collect mount point information
tasks = []
for path in job.paths:
if await path.is_file():
logger.info(f"Adding file: {path}")
tasks.append(ShredFile(path))
elif await path.is_dir():
logger.info(f"Adding directory: {path}")
tasks.append(ShredDir(path, recursive=job.recursive))
else:
raise TypeError(f"Not a file or directory: {path}")
new_paths = set(await asyncio.gather(*tasks))
# Try to delete hardlinks based on the filesystem type
job.paths = await get_all_hardlinks(new_paths)
tasks = [path.absolute() for path in job.ignoredir]
for path in IGNORE_GLOBAL:
tasks.append(AsyncPath(path).absolute())
job.ignoredir = set(await asyncio.gather(*tasks))
# Shred all physical files including hardlinks
for path in job.paths:
tasks = []
if isinstance(path, ShredFile):
tasks.append(path.shred(hash=job.exhaustive, dryrun=job.dryrun, ignoredirs=job.ignoredir))
elif isinstance(path, ShredDir):
tasks.append(path.shred(hash=job.exhaustive, dryrun=job.dryrun, ignoredirs=job.ignoredir))
done, _ = await asyncio.wait(tasks)
for task in done:
e = task.exception()
if e:
logger.warning(f"Error raised while shredding: {e}")
# Just in case, use "find" to delete any remaining hardlinks
# from the mount point, so let's build a map of inodes to mount points
logger.info("Deleting remaining hardlinks using find")
inodes_in_mount_points = defaultdict(set)
for path in job.paths:
inodes_in_mount_points[path.mount_point].add(path.inode)
# We'll also limit concurrency to something reasonable since stat
# on an entire filesystem might be a bit burdensome
semaphore = asyncio.Semaphore(1024)
async def check_inode_and_unlink(item, inodes):
async with semaphore:
try:
stat = await item.stat()
if stat.st_ino in inodes:
log_buf = f"Deleting hardlink: {item.path}"
if not job.dryrun:
log_buf = "DRY RUN " + log_buf
await item.path.unlink()
logger.info(log_buf)
except FileNotFoundError:
pass
for mount_point, inodes in inodes_in_mount_points.items():
# checking for . and .. should not be neccessary w/ rglob
# scandir/glob/rglob doesn't play nice with FileNotFound errors,
# so let's avoid them in dynamic fs areas
if str(mount_point) == "/":
logger.info("Root filesystem mount processing")
async for item in mount_point.glob("*"):
if await item.is_dir():
if str(item) in IGNORE_GLOBAL:
continue
async for subitem in mount_bound_rglob(item, mount_point, "*", job.ignoredir):
abso = await item.absolute()
if str(abso).startswith("/home/parallels/hardlinks"):
logger.warning(f"Scanning {abso}")
logger.warning(f"inodes are {inodes}")
no = await item.stat()
logger.warning(f"{abso} inode is {no.st_ino}")
logger.warning(f"is {abso} in inodes? {str(bool(no.st_ino in inodes))}")
await check_inode_and_unlink(subitem, inodes)
else:
await check_inode_and_unlink(item, inodes)
else:
logger.info(f"Checking non-root filesystem mount: {str(mount_point)}")
async for item in mount_bound_rglob(mount_point, mount_point, "*", job.ignoredir):
await check_inode_and_unlink(item, inodes)
logger.info("Done")