From 055ed8a87a68de1b7218689a0fb0a51a3ac66807 Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Mon, 22 Nov 2021 13:00:45 -0600 Subject: [PATCH] Added cleaning fuction --- config.yml.sample | 4 ++- requirements.txt | 5 ++-- tibi_hardlinks/__init__.py | 50 +++++++++++++++++++++++++++++++++----- tibi_hardlinks/backups.py | 12 ++++++--- 4 files changed, 58 insertions(+), 13 deletions(-) diff --git a/config.yml.sample b/config.yml.sample index 603fa29..f3113a4 100644 --- a/config.yml.sample +++ b/config.yml.sample @@ -4,6 +4,8 @@ input: - "~/tilde/is/supported" output: output_root: "/path/should/be/on/the/same/filesystem" - +cleaning: + min_days_to_keep: 6 + target_size: 40G customization: time_format: "%Y-%m-%d_%H-%M-%S" diff --git a/requirements.txt b/requirements.txt index a01f88b..888ada8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ -pytz -python-dateutil PyYAML appdirs +humanfriendly +python-dateutil +pytz reflink diff --git a/tibi_hardlinks/__init__.py b/tibi_hardlinks/__init__.py index 1dad76b..73f5fb7 100644 --- a/tibi_hardlinks/__init__.py +++ b/tibi_hardlinks/__init__.py @@ -1,6 +1,7 @@ from argparse import ArgumentParser from pathlib import Path +import humanfriendly import yaml from tibi_hardlinks.config import CONFIG_DATA, CONFIG_FILE_PATH @@ -8,12 +9,17 @@ from tibi_hardlinks.backups import Backup, create_backup_directory_name from tibi_hardlinks.cache import cache_manager -def find_all_property_files(): +def find_all_property_files_in_backup_path(): for path in map(Path, CONFIG_DATA["input"]["backup_paths"]): if path.exists(): yield from path.absolute().glob("*.properties") +def find_all_property_files_in_backup_root(): + root = Path(CONFIG_DATA["output"]["output_root"]).absolute() + yield from root.glob("**/*.properties") + + def hardlink_all_files(files, dry=False): for prop_file in files: try: @@ -36,13 +42,14 @@ def hardlink_all_files(files, dry=False): backup.hardlink_to(dest_dir, dry) -def clean_out(backups: list[Backup], before_date, target_size): +def get_backups_to_remove(backups: list[Backup], before_date, target_size): + before_date = before_date.timestamp() total_size = sum(backup.size for backup in backups) - target_removal_size = target_size - total_size + target_removal_size = total_size - target_size if target_removal_size <= 0: return () backups_eligable = ( - backup for backup in backups if backup.properties["backup_time"] <= before_date + backup for backup in backups if backup.backup_info["backup_time"] <= before_date ) sorted_backups_eligable = sorted( backups_eligable, key=lambda backup: backup.size, reverse=True @@ -52,7 +59,22 @@ def clean_out(backups: list[Backup], before_date, target_size): to_remove.append(backup) target_removal_size -= backup.size if target_removal_size <= 0: - return to_remove + break + + return to_remove + + +def clean_out(property_files, days_before, target_size, dry=True): + before_date = datetime.datetime.utcnow() - datetime.timedelta(days=days_before) + to_remove = get_backups_to_remove( + list(map(Backup, property_files)), before_date, target_size + ) + for backup in to_remove: + path_to_remove = backup.property_file.parent + if dry: + print(f"remove '{path_to_remove}'") + else: + shutil.rmtree(path_to_remove) def main(): @@ -70,6 +92,13 @@ def main(): action="store_true", help="Clear the cache entirely", ) + subparsers = parser.add_subparsers(dest="action") + subparsers.add_parser( + "link", + ) + subparsers.add_parser( + "clean", + ) args = parser.parse_args() if args.dump_config: print(CONFIG_FILE_PATH) @@ -79,4 +108,13 @@ def main(): cache_manager.invalidate_all() elif args.clear_already_processed: cache_manager.invalidate_already_processed() - hardlink_all_files(find_all_property_files(), args.dry) + + if args.action == "link": + hardlink_all_files(find_all_property_files_in_backup_path(), args.dry) + elif args.action == "clean": + clean_out( + find_all_property_files_in_backup_root(), + CONFIG_DATA["cleaning"]["min_days_to_keep"], + humanfriendly.parse_size(CONFIG_DATA["cleaning"]["target_size"], True), + args.dry, + ) diff --git a/tibi_hardlinks/backups.py b/tibi_hardlinks/backups.py index 07764f0..17931bd 100644 --- a/tibi_hardlinks/backups.py +++ b/tibi_hardlinks/backups.py @@ -97,13 +97,17 @@ class Backup: self.property_file = property_file self.backup_info = read_backup_properties(property_file) self.related_files = find_backup_data(self.property_file, self.backup_info) + self._size = None @property def size(self): - total = 0 - for f in self.related_files.values(): - total += f.stat().st_size - return total + if self._size is None: + total = 0 + for f in self.related_files.values(): + if f is not None: + total += f.stat().st_size + self._size = total + return self._size def hardlink_to(self, dest_dir, dry): for filepath in self.related_files.values():